Nathan Butters commited on
Commit
03287bc
1 Parent(s): 00378a4

Add all files

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .DS_Store +0 -0
  2. Assets/.DS_Store +0 -0
  3. Assets/.ipynb_checkpoints/countries-checkpoint.csv +195 -0
  4. Assets/Countries/.ipynb_checkpoints/Country-Data-Origin-checkpoint.md +4 -0
  5. Assets/Countries/.ipynb_checkpoints/clean-countries-checkpoint.ipynb +2273 -0
  6. Assets/Countries/.ipynb_checkpoints/combined-countries-checkpoint.csv +167 -0
  7. Assets/Countries/.ipynb_checkpoints/countries-checkpoint.csv +195 -0
  8. Assets/Countries/Country-Data-Origin.md +4 -0
  9. Assets/Countries/DataPanelWHR2021C2.xls +0 -0
  10. Assets/Countries/clean-countries.ipynb +2273 -0
  11. Assets/Countries/combined-countries.csv +198 -0
  12. Assets/Countries/countries.csv +195 -0
  13. Assets/IMC Expansion/US Protected Classes from IMC.csv +74 -0
  14. Assets/Professions/.ipynb_checkpoints/Standard_Occupational_Classifications_Orgin-checkpoint.md +9 -0
  15. Assets/Professions/.ipynb_checkpoints/clean-SOC-2018-checkpoint.ipynb +558 -0
  16. Assets/Professions/.ipynb_checkpoints/soc-professions-2018-checkpoint.csv +0 -0
  17. Assets/Professions/.ipynb_checkpoints/soc_2018_direct_match_title_file-checkpoint.csv +0 -0
  18. Assets/Professions/Standard_Occupational_Classifications_Orgin.md +9 -0
  19. Assets/Professions/clean-SOC-2018.ipynb +558 -0
  20. Assets/Professions/soc-professions-2018.csv +0 -0
  21. Assets/Professions/soc_2018_direct_match_title_file.csv +0 -0
  22. Assets/Professions/soc_2018_direct_match_title_file.xlsx +0 -0
  23. Assets/Professions/soc_structure_2018.xlsx +0 -0
  24. Assets/StereoSet/.ipynb_checkpoints/stereo-set-gender-checkpoint.csv +11 -0
  25. Assets/StereoSet/.ipynb_checkpoints/stereo-set-race-checkpoint.csv +977 -0
  26. Assets/StereoSet/stereo-set-gender.csv +11 -0
  27. Assets/StereoSet/stereo-set-profession.csv +31 -0
  28. Assets/StereoSet/stereo-set-race.csv +37 -0
  29. Assets/StereoSet/stereo-set-religion.csv +4 -0
  30. Assets/VizNLC-Wireframe-example.png +0 -0
  31. Assets/VizNLC-wireframe.png +0 -0
  32. LICENSE +21 -0
  33. Lime Explorations.ipynb +0 -0
  34. NER-tweaks/.DS_Store +0 -0
  35. NER-tweaks/.ipynb_checkpoints/age-bias-checkpoint.jsonl +32 -0
  36. NER-tweaks/.ipynb_checkpoints/entity-ruler-input-checkpoint.jsonl +44 -0
  37. NER-tweaks/.ipynb_checkpoints/gender-test-checkpoint.jsonl +59 -0
  38. NER-tweaks/.ipynb_checkpoints/main-ruler-bias-checkpoint.jsonl +862 -0
  39. NER-tweaks/age-bias.jsonl +32 -0
  40. NER-tweaks/entity-ruler-input.jsonl +44 -0
  41. NER-tweaks/gender-test.jsonl +59 -0
  42. NER-tweaks/main-ruler-bias.jsonl +862 -0
  43. NLselector.py +197 -0
  44. Pipfile +40 -0
  45. Pipfile.lock +0 -0
  46. README OG.md +34 -0
  47. VizNLC-duct-tape-pipeline.ipynb +934 -0
  48. VizNLC-gen-pipeline.ipynb +1175 -0
  49. WNgen.py +313 -0
  50. app.py +340 -0
.DS_Store ADDED
Binary file (8.2 kB). View file
Assets/.DS_Store ADDED
Binary file (6.15 kB). View file
Assets/.ipynb_checkpoints/countries-checkpoint.csv ADDED
@@ -0,0 +1,195 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Country,Continent
2
+ Algeria,Africa
3
+ Angola,Africa
4
+ Benin,Africa
5
+ Botswana,Africa
6
+ Burkina,Africa
7
+ Burundi,Africa
8
+ Cameroon,Africa
9
+ Cape Verde,Africa
10
+ Central African Republic,Africa
11
+ Chad,Africa
12
+ Comoros,Africa
13
+ Congo,Africa
14
+ "Congo, Democratic Republic of",Africa
15
+ Djibouti,Africa
16
+ Egypt,Africa
17
+ Equatorial Guinea,Africa
18
+ Eritrea,Africa
19
+ Ethiopia,Africa
20
+ Gabon,Africa
21
+ Gambia,Africa
22
+ Ghana,Africa
23
+ Guinea,Africa
24
+ Guinea-Bissau,Africa
25
+ Ivory Coast,Africa
26
+ Kenya,Africa
27
+ Lesotho,Africa
28
+ Liberia,Africa
29
+ Libya,Africa
30
+ Madagascar,Africa
31
+ Malawi,Africa
32
+ Mali,Africa
33
+ Mauritania,Africa
34
+ Mauritius,Africa
35
+ Morocco,Africa
36
+ Mozambique,Africa
37
+ Namibia,Africa
38
+ Niger,Africa
39
+ Nigeria,Africa
40
+ Rwanda,Africa
41
+ Sao Tome and Principe,Africa
42
+ Senegal,Africa
43
+ Seychelles,Africa
44
+ Sierra Leone,Africa
45
+ Somalia,Africa
46
+ South Africa,Africa
47
+ South Sudan,Africa
48
+ Sudan,Africa
49
+ Swaziland,Africa
50
+ Tanzania,Africa
51
+ Togo,Africa
52
+ Tunisia,Africa
53
+ Uganda,Africa
54
+ Zambia,Africa
55
+ Zimbabwe,Africa
56
+ Afghanistan,Asia
57
+ Bahrain,Asia
58
+ Bangladesh,Asia
59
+ Bhutan,Asia
60
+ Brunei,Asia
61
+ Burma (Myanmar),Asia
62
+ Cambodia,Asia
63
+ China,Asia
64
+ East Timor,Asia
65
+ India,Asia
66
+ Indonesia,Asia
67
+ Iran,Asia
68
+ Iraq,Asia
69
+ Israel,Asia
70
+ Japan,Asia
71
+ Jordan,Asia
72
+ Kazakhstan,Asia
73
+ "Korea, North",Asia
74
+ "Korea, South",Asia
75
+ Kuwait,Asia
76
+ Kyrgyzstan,Asia
77
+ Laos,Asia
78
+ Lebanon,Asia
79
+ Malaysia,Asia
80
+ Maldives,Asia
81
+ Mongolia,Asia
82
+ Nepal,Asia
83
+ Oman,Asia
84
+ Pakistan,Asia
85
+ Philippines,Asia
86
+ Qatar,Asia
87
+ Russian Federation,Asia
88
+ Saudi Arabia,Asia
89
+ Singapore,Asia
90
+ Sri Lanka,Asia
91
+ Syria,Asia
92
+ Tajikistan,Asia
93
+ Thailand,Asia
94
+ Turkey,Asia
95
+ Turkmenistan,Asia
96
+ United Arab Emirates,Asia
97
+ Uzbekistan,Asia
98
+ Vietnam,Asia
99
+ Yemen,Asia
100
+ Albania,Europe
101
+ Andorra,Europe
102
+ Armenia,Europe
103
+ Austria,Europe
104
+ Azerbaijan,Europe
105
+ Belarus,Europe
106
+ Belgium,Europe
107
+ Bosnia and Herzegovina,Europe
108
+ Bulgaria,Europe
109
+ Croatia,Europe
110
+ Cyprus,Europe
111
+ CZ,Europe
112
+ Denmark,Europe
113
+ Estonia,Europe
114
+ Finland,Europe
115
+ France,Europe
116
+ Georgia,Europe
117
+ Germany,Europe
118
+ Greece,Europe
119
+ Hungary,Europe
120
+ Iceland,Europe
121
+ Ireland,Europe
122
+ Italy,Europe
123
+ Latvia,Europe
124
+ Liechtenstein,Europe
125
+ Lithuania,Europe
126
+ Luxembourg,Europe
127
+ Macedonia,Europe
128
+ Malta,Europe
129
+ Moldova,Europe
130
+ Monaco,Europe
131
+ Montenegro,Europe
132
+ Netherlands,Europe
133
+ Norway,Europe
134
+ Poland,Europe
135
+ Portugal,Europe
136
+ Romania,Europe
137
+ San Marino,Europe
138
+ Serbia,Europe
139
+ Slovakia,Europe
140
+ Slovenia,Europe
141
+ Spain,Europe
142
+ Sweden,Europe
143
+ Switzerland,Europe
144
+ Ukraine,Europe
145
+ United Kingdom,Europe
146
+ Vatican City,Europe
147
+ Antigua and Barbuda,North America
148
+ Bahamas,North America
149
+ Barbados,North America
150
+ Belize,North America
151
+ Canada,North America
152
+ Costa Rica,North America
153
+ Cuba,North America
154
+ Dominica,North America
155
+ Dominican Republic,North America
156
+ El Salvador,North America
157
+ Grenada,North America
158
+ Guatemala,North America
159
+ Haiti,North America
160
+ Honduras,North America
161
+ Jamaica,North America
162
+ Mexico,North America
163
+ Nicaragua,North America
164
+ Panama,North America
165
+ Saint Kitts and Nevis,North America
166
+ Saint Lucia,North America
167
+ Saint Vincent and the Grenadines,North America
168
+ Trinidad and Tobago,North America
169
+ US,North America
170
+ Australia,Oceania
171
+ Fiji,Oceania
172
+ Kiribati,Oceania
173
+ Marshall Islands,Oceania
174
+ Micronesia,Oceania
175
+ Nauru,Oceania
176
+ New Zealand,Oceania
177
+ Palau,Oceania
178
+ Papua New Guinea,Oceania
179
+ Samoa,Oceania
180
+ Solomon Islands,Oceania
181
+ Tonga,Oceania
182
+ Tuvalu,Oceania
183
+ Vanuatu,Oceania
184
+ Argentina,South America
185
+ Bolivia,South America
186
+ Brazil,South America
187
+ Chile,South America
188
+ Colombia,South America
189
+ Ecuador,South America
190
+ Guyana,South America
191
+ Paraguay,South America
192
+ Peru,South America
193
+ Suriname,South America
194
+ Uruguay,South America
195
+ Venezuela,South America
Assets/Countries/.ipynb_checkpoints/Country-Data-Origin-checkpoint.md ADDED
@@ -0,0 +1,4 @@
 
 
 
 
1
+ # Origin of the country data used in this project
2
+
3
+ I started by getting a list of countries on Github, from [
4
+ Daina Bouquin](https://github.com/dbouquin/IS_608/blob/master/NanosatDB_munging/Countries-Continents.csv), because it seemed relatively completey and contained continents. Then I started to think about secondary data that might be useful for exposing the bias in an algorithm and opted for the [World Happiness Report 2021](https://worldhappiness.report/ed/2021/#appendices-and-data). I added the continents to the countries in that file to ensure I could retain the initial categorization I used.
Assets/Countries/.ipynb_checkpoints/clean-countries-checkpoint.ipynb ADDED
@@ -0,0 +1,2273 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "id": "daf46b53-319f-4973-9bb6-664135dd328e",
7
+ "metadata": {},
8
+ "outputs": [],
9
+ "source": [
10
+ "import pandas as pd, spacy, nltk, numpy as np, re, ssl"
11
+ ]
12
+ },
13
+ {
14
+ "cell_type": "code",
15
+ "execution_count": 56,
16
+ "id": "3cae7a11-7696-40fc-967e-7ecafcb2b0da",
17
+ "metadata": {},
18
+ "outputs": [],
19
+ "source": [
20
+ "df = pd.read_excel(\"Assets/Countries/DataPanelWHR2021C2.xls\")"
21
+ ]
22
+ },
23
+ {
24
+ "cell_type": "code",
25
+ "execution_count": 57,
26
+ "id": "c1ebf3f3-1d38-4919-b60a-dc15e7bf907b",
27
+ "metadata": {},
28
+ "outputs": [
29
+ {
30
+ "data": {
31
+ "text/html": [
32
+ "<div>\n",
33
+ "<style scoped>\n",
34
+ " .dataframe tbody tr th:only-of-type {\n",
35
+ " vertical-align: middle;\n",
36
+ " }\n",
37
+ "\n",
38
+ " .dataframe tbody tr th {\n",
39
+ " vertical-align: top;\n",
40
+ " }\n",
41
+ "\n",
42
+ " .dataframe thead th {\n",
43
+ " text-align: right;\n",
44
+ " }\n",
45
+ "</style>\n",
46
+ "<table border=\"1\" class=\"dataframe\">\n",
47
+ " <thead>\n",
48
+ " <tr style=\"text-align: right;\">\n",
49
+ " <th></th>\n",
50
+ " <th>Country</th>\n",
51
+ " <th>year</th>\n",
52
+ " <th>Life Ladder</th>\n",
53
+ " <th>Log GDP per capita</th>\n",
54
+ " <th>Social support</th>\n",
55
+ " <th>Healthy life expectancy at birth</th>\n",
56
+ " <th>Freedom to make life choices</th>\n",
57
+ " <th>Generosity</th>\n",
58
+ " <th>Perceptions of corruption</th>\n",
59
+ " <th>Positive affect</th>\n",
60
+ " <th>Negative affect</th>\n",
61
+ " </tr>\n",
62
+ " </thead>\n",
63
+ " <tbody>\n",
64
+ " <tr>\n",
65
+ " <th>0</th>\n",
66
+ " <td>Afghanistan</td>\n",
67
+ " <td>2008</td>\n",
68
+ " <td>3.723590</td>\n",
69
+ " <td>7.370100</td>\n",
70
+ " <td>0.450662</td>\n",
71
+ " <td>50.799999</td>\n",
72
+ " <td>0.718114</td>\n",
73
+ " <td>0.167640</td>\n",
74
+ " <td>0.881686</td>\n",
75
+ " <td>0.517637</td>\n",
76
+ " <td>0.258195</td>\n",
77
+ " </tr>\n",
78
+ " <tr>\n",
79
+ " <th>1</th>\n",
80
+ " <td>Afghanistan</td>\n",
81
+ " <td>2009</td>\n",
82
+ " <td>4.401778</td>\n",
83
+ " <td>7.539972</td>\n",
84
+ " <td>0.552308</td>\n",
85
+ " <td>51.200001</td>\n",
86
+ " <td>0.678896</td>\n",
87
+ " <td>0.190099</td>\n",
88
+ " <td>0.850035</td>\n",
89
+ " <td>0.583926</td>\n",
90
+ " <td>0.237092</td>\n",
91
+ " </tr>\n",
92
+ " <tr>\n",
93
+ " <th>2</th>\n",
94
+ " <td>Afghanistan</td>\n",
95
+ " <td>2010</td>\n",
96
+ " <td>4.758381</td>\n",
97
+ " <td>7.646709</td>\n",
98
+ " <td>0.539075</td>\n",
99
+ " <td>51.599998</td>\n",
100
+ " <td>0.600127</td>\n",
101
+ " <td>0.120590</td>\n",
102
+ " <td>0.706766</td>\n",
103
+ " <td>0.618265</td>\n",
104
+ " <td>0.275324</td>\n",
105
+ " </tr>\n",
106
+ " <tr>\n",
107
+ " <th>3</th>\n",
108
+ " <td>Afghanistan</td>\n",
109
+ " <td>2011</td>\n",
110
+ " <td>3.831719</td>\n",
111
+ " <td>7.619532</td>\n",
112
+ " <td>0.521104</td>\n",
113
+ " <td>51.919998</td>\n",
114
+ " <td>0.495901</td>\n",
115
+ " <td>0.162427</td>\n",
116
+ " <td>0.731109</td>\n",
117
+ " <td>0.611387</td>\n",
118
+ " <td>0.267175</td>\n",
119
+ " </tr>\n",
120
+ " <tr>\n",
121
+ " <th>4</th>\n",
122
+ " <td>Afghanistan</td>\n",
123
+ " <td>2012</td>\n",
124
+ " <td>3.782938</td>\n",
125
+ " <td>7.705479</td>\n",
126
+ " <td>0.520637</td>\n",
127
+ " <td>52.240002</td>\n",
128
+ " <td>0.530935</td>\n",
129
+ " <td>0.236032</td>\n",
130
+ " <td>0.775620</td>\n",
131
+ " <td>0.710385</td>\n",
132
+ " <td>0.267919</td>\n",
133
+ " </tr>\n",
134
+ " </tbody>\n",
135
+ "</table>\n",
136
+ "</div>"
137
+ ],
138
+ "text/plain": [
139
+ " Country year Life Ladder Log GDP per capita Social support \\\n",
140
+ "0 Afghanistan 2008 3.723590 7.370100 0.450662 \n",
141
+ "1 Afghanistan 2009 4.401778 7.539972 0.552308 \n",
142
+ "2 Afghanistan 2010 4.758381 7.646709 0.539075 \n",
143
+ "3 Afghanistan 2011 3.831719 7.619532 0.521104 \n",
144
+ "4 Afghanistan 2012 3.782938 7.705479 0.520637 \n",
145
+ "\n",
146
+ " Healthy life expectancy at birth Freedom to make life choices Generosity \\\n",
147
+ "0 50.799999 0.718114 0.167640 \n",
148
+ "1 51.200001 0.678896 0.190099 \n",
149
+ "2 51.599998 0.600127 0.120590 \n",
150
+ "3 51.919998 0.495901 0.162427 \n",
151
+ "4 52.240002 0.530935 0.236032 \n",
152
+ "\n",
153
+ " Perceptions of corruption Positive affect Negative affect \n",
154
+ "0 0.881686 0.517637 0.258195 \n",
155
+ "1 0.850035 0.583926 0.237092 \n",
156
+ "2 0.706766 0.618265 0.275324 \n",
157
+ "3 0.731109 0.611387 0.267175 \n",
158
+ "4 0.775620 0.710385 0.267919 "
159
+ ]
160
+ },
161
+ "execution_count": 57,
162
+ "metadata": {},
163
+ "output_type": "execute_result"
164
+ }
165
+ ],
166
+ "source": [
167
+ "df.head()"
168
+ ]
169
+ },
170
+ {
171
+ "cell_type": "code",
172
+ "execution_count": 59,
173
+ "id": "a1d054e6-8ca7-4675-913e-b0b500afe105",
174
+ "metadata": {},
175
+ "outputs": [],
176
+ "source": [
177
+ "df_sorted = df.sort_values(by=['year'], ascending = False)"
178
+ ]
179
+ },
180
+ {
181
+ "cell_type": "code",
182
+ "execution_count": 60,
183
+ "id": "42d08d97-fa68-40dc-9cfd-b0aa8acbb838",
184
+ "metadata": {},
185
+ "outputs": [
186
+ {
187
+ "data": {
188
+ "text/html": [
189
+ "<div>\n",
190
+ "<style scoped>\n",
191
+ " .dataframe tbody tr th:only-of-type {\n",
192
+ " vertical-align: middle;\n",
193
+ " }\n",
194
+ "\n",
195
+ " .dataframe tbody tr th {\n",
196
+ " vertical-align: top;\n",
197
+ " }\n",
198
+ "\n",
199
+ " .dataframe thead th {\n",
200
+ " text-align: right;\n",
201
+ " }\n",
202
+ "</style>\n",
203
+ "<table border=\"1\" class=\"dataframe\">\n",
204
+ " <thead>\n",
205
+ " <tr style=\"text-align: right;\">\n",
206
+ " <th></th>\n",
207
+ " <th>Country</th>\n",
208
+ " <th>year</th>\n",
209
+ " <th>Life Ladder</th>\n",
210
+ " <th>Log GDP per capita</th>\n",
211
+ " <th>Social support</th>\n",
212
+ " <th>Healthy life expectancy at birth</th>\n",
213
+ " <th>Freedom to make life choices</th>\n",
214
+ " <th>Generosity</th>\n",
215
+ " <th>Perceptions of corruption</th>\n",
216
+ " <th>Positive affect</th>\n",
217
+ " <th>Negative affect</th>\n",
218
+ " </tr>\n",
219
+ " </thead>\n",
220
+ " <tbody>\n",
221
+ " <tr>\n",
222
+ " <th>1948</th>\n",
223
+ " <td>Zimbabwe</td>\n",
224
+ " <td>2020</td>\n",
225
+ " <td>3.159802</td>\n",
226
+ " <td>7.828757</td>\n",
227
+ " <td>0.717243</td>\n",
228
+ " <td>56.799999</td>\n",
229
+ " <td>0.643303</td>\n",
230
+ " <td>-0.008696</td>\n",
231
+ " <td>0.788523</td>\n",
232
+ " <td>0.702573</td>\n",
233
+ " <td>0.345736</td>\n",
234
+ " </tr>\n",
235
+ " <tr>\n",
236
+ " <th>174</th>\n",
237
+ " <td>Benin</td>\n",
238
+ " <td>2020</td>\n",
239
+ " <td>4.407746</td>\n",
240
+ " <td>8.102292</td>\n",
241
+ " <td>0.506636</td>\n",
242
+ " <td>55.099998</td>\n",
243
+ " <td>0.783115</td>\n",
244
+ " <td>-0.083489</td>\n",
245
+ " <td>0.531884</td>\n",
246
+ " <td>0.608585</td>\n",
247
+ " <td>0.304512</td>\n",
248
+ " </tr>\n",
249
+ " <tr>\n",
250
+ " <th>1835</th>\n",
251
+ " <td>United Kingdom</td>\n",
252
+ " <td>2020</td>\n",
253
+ " <td>6.798177</td>\n",
254
+ " <td>10.625811</td>\n",
255
+ " <td>0.929353</td>\n",
256
+ " <td>72.699997</td>\n",
257
+ " <td>0.884624</td>\n",
258
+ " <td>0.202508</td>\n",
259
+ " <td>0.490204</td>\n",
260
+ " <td>0.758164</td>\n",
261
+ " <td>0.224655</td>\n",
262
+ " </tr>\n",
263
+ " <tr>\n",
264
+ " <th>1394</th>\n",
265
+ " <td>Philippines</td>\n",
266
+ " <td>2020</td>\n",
267
+ " <td>5.079585</td>\n",
268
+ " <td>9.061443</td>\n",
269
+ " <td>0.781140</td>\n",
270
+ " <td>62.099998</td>\n",
271
+ " <td>0.932042</td>\n",
272
+ " <td>-0.115543</td>\n",
273
+ " <td>0.744284</td>\n",
274
+ " <td>0.803562</td>\n",
275
+ " <td>0.326889</td>\n",
276
+ " </tr>\n",
277
+ " <tr>\n",
278
+ " <th>785</th>\n",
279
+ " <td>Iraq</td>\n",
280
+ " <td>2020</td>\n",
281
+ " <td>4.785165</td>\n",
282
+ " <td>9.167186</td>\n",
283
+ " <td>0.707847</td>\n",
284
+ " <td>61.400002</td>\n",
285
+ " <td>0.700215</td>\n",
286
+ " <td>-0.020748</td>\n",
287
+ " <td>0.849109</td>\n",
288
+ " <td>0.644464</td>\n",
289
+ " <td>0.531539</td>\n",
290
+ " </tr>\n",
291
+ " </tbody>\n",
292
+ "</table>\n",
293
+ "</div>"
294
+ ],
295
+ "text/plain": [
296
+ " Country year Life Ladder Log GDP per capita Social support \\\n",
297
+ "1948 Zimbabwe 2020 3.159802 7.828757 0.717243 \n",
298
+ "174 Benin 2020 4.407746 8.102292 0.506636 \n",
299
+ "1835 United Kingdom 2020 6.798177 10.625811 0.929353 \n",
300
+ "1394 Philippines 2020 5.079585 9.061443 0.781140 \n",
301
+ "785 Iraq 2020 4.785165 9.167186 0.707847 \n",
302
+ "\n",
303
+ " Healthy life expectancy at birth Freedom to make life choices \\\n",
304
+ "1948 56.799999 0.643303 \n",
305
+ "174 55.099998 0.783115 \n",
306
+ "1835 72.699997 0.884624 \n",
307
+ "1394 62.099998 0.932042 \n",
308
+ "785 61.400002 0.700215 \n",
309
+ "\n",
310
+ " Generosity Perceptions of corruption Positive affect Negative affect \n",
311
+ "1948 -0.008696 0.788523 0.702573 0.345736 \n",
312
+ "174 -0.083489 0.531884 0.608585 0.304512 \n",
313
+ "1835 0.202508 0.490204 0.758164 0.224655 \n",
314
+ "1394 -0.115543 0.744284 0.803562 0.326889 \n",
315
+ "785 -0.020748 0.849109 0.644464 0.531539 "
316
+ ]
317
+ },
318
+ "execution_count": 60,
319
+ "metadata": {},
320
+ "output_type": "execute_result"
321
+ }
322
+ ],
323
+ "source": [
324
+ "df_sorted.head()"
325
+ ]
326
+ },
327
+ {
328
+ "cell_type": "code",
329
+ "execution_count": 61,
330
+ "id": "abb8954c-106f-42d1-bf2a-0200b8927306",
331
+ "metadata": {},
332
+ "outputs": [],
333
+ "source": [
334
+ "df_dedup = df_sorted.drop_duplicates(subset=['Country'])"
335
+ ]
336
+ },
337
+ {
338
+ "cell_type": "code",
339
+ "execution_count": 62,
340
+ "id": "969f5fcf-5dc6-4ce3-93f7-0f35473f3c73",
341
+ "metadata": {},
342
+ "outputs": [
343
+ {
344
+ "data": {
345
+ "text/html": [
346
+ "<div>\n",
347
+ "<style scoped>\n",
348
+ " .dataframe tbody tr th:only-of-type {\n",
349
+ " vertical-align: middle;\n",
350
+ " }\n",
351
+ "\n",
352
+ " .dataframe tbody tr th {\n",
353
+ " vertical-align: top;\n",
354
+ " }\n",
355
+ "\n",
356
+ " .dataframe thead th {\n",
357
+ " text-align: right;\n",
358
+ " }\n",
359
+ "</style>\n",
360
+ "<table border=\"1\" class=\"dataframe\">\n",
361
+ " <thead>\n",
362
+ " <tr style=\"text-align: right;\">\n",
363
+ " <th></th>\n",
364
+ " <th>Country</th>\n",
365
+ " <th>year</th>\n",
366
+ " <th>Life Ladder</th>\n",
367
+ " <th>Log GDP per capita</th>\n",
368
+ " <th>Social support</th>\n",
369
+ " <th>Healthy life expectancy at birth</th>\n",
370
+ " <th>Freedom to make life choices</th>\n",
371
+ " <th>Generosity</th>\n",
372
+ " <th>Perceptions of corruption</th>\n",
373
+ " <th>Positive affect</th>\n",
374
+ " <th>Negative affect</th>\n",
375
+ " </tr>\n",
376
+ " </thead>\n",
377
+ " <tbody>\n",
378
+ " <tr>\n",
379
+ " <th>1948</th>\n",
380
+ " <td>Zimbabwe</td>\n",
381
+ " <td>2020</td>\n",
382
+ " <td>3.159802</td>\n",
383
+ " <td>7.828757</td>\n",
384
+ " <td>0.717243</td>\n",
385
+ " <td>56.799999</td>\n",
386
+ " <td>0.643303</td>\n",
387
+ " <td>-0.008696</td>\n",
388
+ " <td>0.788523</td>\n",
389
+ " <td>0.702573</td>\n",
390
+ " <td>0.345736</td>\n",
391
+ " </tr>\n",
392
+ " <tr>\n",
393
+ " <th>174</th>\n",
394
+ " <td>Benin</td>\n",
395
+ " <td>2020</td>\n",
396
+ " <td>4.407746</td>\n",
397
+ " <td>8.102292</td>\n",
398
+ " <td>0.506636</td>\n",
399
+ " <td>55.099998</td>\n",
400
+ " <td>0.783115</td>\n",
401
+ " <td>-0.083489</td>\n",
402
+ " <td>0.531884</td>\n",
403
+ " <td>0.608585</td>\n",
404
+ " <td>0.304512</td>\n",
405
+ " </tr>\n",
406
+ " <tr>\n",
407
+ " <th>1835</th>\n",
408
+ " <td>United Kingdom</td>\n",
409
+ " <td>2020</td>\n",
410
+ " <td>6.798177</td>\n",
411
+ " <td>10.625811</td>\n",
412
+ " <td>0.929353</td>\n",
413
+ " <td>72.699997</td>\n",
414
+ " <td>0.884624</td>\n",
415
+ " <td>0.202508</td>\n",
416
+ " <td>0.490204</td>\n",
417
+ " <td>0.758164</td>\n",
418
+ " <td>0.224655</td>\n",
419
+ " </tr>\n",
420
+ " <tr>\n",
421
+ " <th>1394</th>\n",
422
+ " <td>Philippines</td>\n",
423
+ " <td>2020</td>\n",
424
+ " <td>5.079585</td>\n",
425
+ " <td>9.061443</td>\n",
426
+ " <td>0.781140</td>\n",
427
+ " <td>62.099998</td>\n",
428
+ " <td>0.932042</td>\n",
429
+ " <td>-0.115543</td>\n",
430
+ " <td>0.744284</td>\n",
431
+ " <td>0.803562</td>\n",
432
+ " <td>0.326889</td>\n",
433
+ " </tr>\n",
434
+ " <tr>\n",
435
+ " <th>785</th>\n",
436
+ " <td>Iraq</td>\n",
437
+ " <td>2020</td>\n",
438
+ " <td>4.785165</td>\n",
439
+ " <td>9.167186</td>\n",
440
+ " <td>0.707847</td>\n",
441
+ " <td>61.400002</td>\n",
442
+ " <td>0.700215</td>\n",
443
+ " <td>-0.020748</td>\n",
444
+ " <td>0.849109</td>\n",
445
+ " <td>0.644464</td>\n",
446
+ " <td>0.531539</td>\n",
447
+ " </tr>\n",
448
+ " </tbody>\n",
449
+ "</table>\n",
450
+ "</div>"
451
+ ],
452
+ "text/plain": [
453
+ " Country year Life Ladder Log GDP per capita Social support \\\n",
454
+ "1948 Zimbabwe 2020 3.159802 7.828757 0.717243 \n",
455
+ "174 Benin 2020 4.407746 8.102292 0.506636 \n",
456
+ "1835 United Kingdom 2020 6.798177 10.625811 0.929353 \n",
457
+ "1394 Philippines 2020 5.079585 9.061443 0.781140 \n",
458
+ "785 Iraq 2020 4.785165 9.167186 0.707847 \n",
459
+ "\n",
460
+ " Healthy life expectancy at birth Freedom to make life choices \\\n",
461
+ "1948 56.799999 0.643303 \n",
462
+ "174 55.099998 0.783115 \n",
463
+ "1835 72.699997 0.884624 \n",
464
+ "1394 62.099998 0.932042 \n",
465
+ "785 61.400002 0.700215 \n",
466
+ "\n",
467
+ " Generosity Perceptions of corruption Positive affect Negative affect \n",
468
+ "1948 -0.008696 0.788523 0.702573 0.345736 \n",
469
+ "174 -0.083489 0.531884 0.608585 0.304512 \n",
470
+ "1835 0.202508 0.490204 0.758164 0.224655 \n",
471
+ "1394 -0.115543 0.744284 0.803562 0.326889 \n",
472
+ "785 -0.020748 0.849109 0.644464 0.531539 "
473
+ ]
474
+ },
475
+ "execution_count": 62,
476
+ "metadata": {},
477
+ "output_type": "execute_result"
478
+ }
479
+ ],
480
+ "source": [
481
+ "df_dedup.head()"
482
+ ]
483
+ },
484
+ {
485
+ "cell_type": "code",
486
+ "execution_count": 63,
487
+ "id": "d080546c-4698-4edd-8b76-e3c94aee9862",
488
+ "metadata": {},
489
+ "outputs": [
490
+ {
491
+ "data": {
492
+ "text/plain": [
493
+ "1949"
494
+ ]
495
+ },
496
+ "execution_count": 63,
497
+ "metadata": {},
498
+ "output_type": "execute_result"
499
+ }
500
+ ],
501
+ "source": [
502
+ "len(df_sorted)"
503
+ ]
504
+ },
505
+ {
506
+ "cell_type": "code",
507
+ "execution_count": 64,
508
+ "id": "6a817f5c-e871-4d69-9368-00a90efc6007",
509
+ "metadata": {},
510
+ "outputs": [
511
+ {
512
+ "data": {
513
+ "text/plain": [
514
+ "166"
515
+ ]
516
+ },
517
+ "execution_count": 64,
518
+ "metadata": {},
519
+ "output_type": "execute_result"
520
+ }
521
+ ],
522
+ "source": [
523
+ "len(df_dedup)"
524
+ ]
525
+ },
526
+ {
527
+ "cell_type": "code",
528
+ "execution_count": 65,
529
+ "id": "d6640a42-064e-4b31-b89d-de4f7d4240a3",
530
+ "metadata": {},
531
+ "outputs": [
532
+ {
533
+ "data": {
534
+ "text/html": [
535
+ "<div>\n",
536
+ "<style scoped>\n",
537
+ " .dataframe tbody tr th:only-of-type {\n",
538
+ " vertical-align: middle;\n",
539
+ " }\n",
540
+ "\n",
541
+ " .dataframe tbody tr th {\n",
542
+ " vertical-align: top;\n",
543
+ " }\n",
544
+ "\n",
545
+ " .dataframe thead th {\n",
546
+ " text-align: right;\n",
547
+ " }\n",
548
+ "</style>\n",
549
+ "<table border=\"1\" class=\"dataframe\">\n",
550
+ " <thead>\n",
551
+ " <tr style=\"text-align: right;\">\n",
552
+ " <th></th>\n",
553
+ " <th>Country</th>\n",
554
+ " <th>Continent</th>\n",
555
+ " </tr>\n",
556
+ " </thead>\n",
557
+ " <tbody>\n",
558
+ " <tr>\n",
559
+ " <th>0</th>\n",
560
+ " <td>Algeria</td>\n",
561
+ " <td>Africa</td>\n",
562
+ " </tr>\n",
563
+ " <tr>\n",
564
+ " <th>1</th>\n",
565
+ " <td>Angola</td>\n",
566
+ " <td>Africa</td>\n",
567
+ " </tr>\n",
568
+ " <tr>\n",
569
+ " <th>2</th>\n",
570
+ " <td>Benin</td>\n",
571
+ " <td>Africa</td>\n",
572
+ " </tr>\n",
573
+ " <tr>\n",
574
+ " <th>3</th>\n",
575
+ " <td>Botswana</td>\n",
576
+ " <td>Africa</td>\n",
577
+ " </tr>\n",
578
+ " <tr>\n",
579
+ " <th>4</th>\n",
580
+ " <td>Burkina</td>\n",
581
+ " <td>Africa</td>\n",
582
+ " </tr>\n",
583
+ " </tbody>\n",
584
+ "</table>\n",
585
+ "</div>"
586
+ ],
587
+ "text/plain": [
588
+ " Country Continent\n",
589
+ "0 Algeria Africa\n",
590
+ "1 Angola Africa\n",
591
+ "2 Benin Africa\n",
592
+ "3 Botswana Africa\n",
593
+ "4 Burkina Africa"
594
+ ]
595
+ },
596
+ "execution_count": 65,
597
+ "metadata": {},
598
+ "output_type": "execute_result"
599
+ }
600
+ ],
601
+ "source": [
602
+ "df_csv = pd.read_csv(\"Assets/Countries/countries.csv\")\n",
603
+ "df_csv.head()"
604
+ ]
605
+ },
606
+ {
607
+ "cell_type": "code",
608
+ "execution_count": 18,
609
+ "id": "a6e6f52e-cff7-4d78-b630-e71e07fa8842",
610
+ "metadata": {},
611
+ "outputs": [
612
+ {
613
+ "data": {
614
+ "text/plain": [
615
+ "194"
616
+ ]
617
+ },
618
+ "execution_count": 18,
619
+ "metadata": {},
620
+ "output_type": "execute_result"
621
+ }
622
+ ],
623
+ "source": [
624
+ "len(df_csv)"
625
+ ]
626
+ },
627
+ {
628
+ "cell_type": "code",
629
+ "execution_count": 66,
630
+ "id": "edaae740-75bf-42a2-afa6-ebbbbf50d792",
631
+ "metadata": {},
632
+ "outputs": [],
633
+ "source": [
634
+ "c1 = df_dedup[\"Country\"]\n",
635
+ "c2 = list(df_csv[\"Country\"])\n",
636
+ "c3 = [(country, country in c2) for country in c1]"
637
+ ]
638
+ },
639
+ {
640
+ "cell_type": "code",
641
+ "execution_count": 67,
642
+ "id": "5e86b02e-e5a3-4eaf-b045-74f0d0cfea08",
643
+ "metadata": {},
644
+ "outputs": [
645
+ {
646
+ "data": {
647
+ "text/plain": [
648
+ "True"
649
+ ]
650
+ },
651
+ "execution_count": 67,
652
+ "metadata": {},
653
+ "output_type": "execute_result"
654
+ }
655
+ ],
656
+ "source": [
657
+ "\"Zimbabwe\" in c2"
658
+ ]
659
+ },
660
+ {
661
+ "cell_type": "code",
662
+ "execution_count": 68,
663
+ "id": "921765a7-6f40-4d6a-9403-f5f8d8f26a65",
664
+ "metadata": {},
665
+ "outputs": [
666
+ {
667
+ "data": {
668
+ "text/plain": [
669
+ "[('Zimbabwe', True),\n",
670
+ " ('Benin', True),\n",
671
+ " ('United Kingdom', True),\n",
672
+ " ('Philippines', True),\n",
673
+ " ('Iraq', True),\n",
674
+ " ('Belgium', True),\n",
675
+ " ('Iran', True),\n",
676
+ " ('Poland', True),\n",
677
+ " ('Portugal', True),\n",
678
+ " ('India', True),\n",
679
+ " ('Israel', True),\n",
680
+ " ('Iceland', True),\n",
681
+ " ('United Arab Emirates', True),\n",
682
+ " ('Hungary', True),\n",
683
+ " ('Hong Kong S.A.R. of China', False),\n",
684
+ " ('Bolivia', True),\n",
685
+ " ('Russia', False),\n",
686
+ " ('Saudi Arabia', True),\n",
687
+ " ('Ireland', True),\n",
688
+ " ('Italy', True),\n",
689
+ " ('Ukraine', True),\n",
690
+ " ('Kenya', True),\n",
691
+ " ('Latvia', True),\n",
692
+ " ('Laos', True),\n",
693
+ " ('Nigeria', True),\n",
694
+ " ('Austria', True),\n",
695
+ " ('Kyrgyzstan', True),\n",
696
+ " ('North Macedonia', False),\n",
697
+ " ('Kosovo', False),\n",
698
+ " ('Norway', True),\n",
699
+ " ('United States', False),\n",
700
+ " ('Kazakhstan', True),\n",
701
+ " ('Bahrain', True),\n",
702
+ " ('Uruguay', True),\n",
703
+ " ('Jordan', True),\n",
704
+ " ('Japan', True),\n",
705
+ " ('Bangladesh', True),\n",
706
+ " ('Ivory Coast', True),\n",
707
+ " ('Bosnia and Herzegovina', True),\n",
708
+ " ('Greece', True),\n",
709
+ " ('Australia', True),\n",
710
+ " ('Croatia', True),\n",
711
+ " ('Tunisia', True),\n",
712
+ " ('Spain', True),\n",
713
+ " ('Denmark', True),\n",
714
+ " ('Cameroon', True),\n",
715
+ " ('Czech Republic', False),\n",
716
+ " ('Cyprus', True),\n",
717
+ " ('Sweden', True),\n",
718
+ " ('Canada', True),\n",
719
+ " ('South Korea', False),\n",
720
+ " ('Switzerland', True),\n",
721
+ " ('Thailand', True),\n",
722
+ " ('Taiwan Province of China', False),\n",
723
+ " ('Colombia', True),\n",
724
+ " ('Tajikistan', True),\n",
725
+ " ('Tanzania', True),\n",
726
+ " ('China', True),\n",
727
+ " ('Dominican Republic', True),\n",
728
+ " ('Cambodia', True),\n",
729
+ " ('Ghana', True),\n",
730
+ " ('Slovakia', True),\n",
731
+ " ('Serbia', True),\n",
732
+ " ('Uganda', True),\n",
733
+ " ('Germany', True),\n",
734
+ " ('Georgia', True),\n",
735
+ " ('Brazil', True),\n",
736
+ " ('France', True),\n",
737
+ " ('Bulgaria', True),\n",
738
+ " ('Finland', True),\n",
739
+ " ('Ecuador', True),\n",
740
+ " ('Ethiopia', True),\n",
741
+ " ('Slovenia', True),\n",
742
+ " ('Estonia', True),\n",
743
+ " ('El Salvador', True),\n",
744
+ " ('Turkey', True),\n",
745
+ " ('South Africa', True),\n",
746
+ " ('Egypt', True),\n",
747
+ " ('Venezuela', True),\n",
748
+ " ('Chile', True),\n",
749
+ " ('Lithuania', True),\n",
750
+ " ('Moldova', True),\n",
751
+ " ('Netherlands', True),\n",
752
+ " ('Mongolia', True),\n",
753
+ " ('Mauritius', True),\n",
754
+ " ('Mexico', True),\n",
755
+ " ('New Zealand', True),\n",
756
+ " ('Namibia', True),\n",
757
+ " ('Myanmar', False),\n",
758
+ " ('Malta', True),\n",
759
+ " ('Zambia', True),\n",
760
+ " ('Argentina', True),\n",
761
+ " ('Morocco', True),\n",
762
+ " ('Albania', True),\n",
763
+ " ('Montenegro', True),\n",
764
+ " ('Guinea', True),\n",
765
+ " ('Yemen', True),\n",
766
+ " ('Guatemala', True),\n",
767
+ " ('Malaysia', True),\n",
768
+ " ('Rwanda', True),\n",
769
+ " ('Sri Lanka', True),\n",
770
+ " ('Malawi', True),\n",
771
+ " ('Nepal', True),\n",
772
+ " ('Swaziland', True),\n",
773
+ " ('Romania', True),\n",
774
+ " ('Senegal', True),\n",
775
+ " ('Honduras', True),\n",
776
+ " ('Mali', True),\n",
777
+ " ('Mauritania', True),\n",
778
+ " ('Turkmenistan', True),\n",
779
+ " ('Burkina Faso', False),\n",
780
+ " ('Algeria', True),\n",
781
+ " ('Botswana', True),\n",
782
+ " ('Sierra Leone', True),\n",
783
+ " ('Mozambique', True),\n",
784
+ " ('Singapore', True),\n",
785
+ " ('Gambia', True),\n",
786
+ " ('Gabon', True),\n",
787
+ " ('Indonesia', True),\n",
788
+ " ('Azerbaijan', True),\n",
789
+ " ('Chad', True),\n",
790
+ " ('Liberia', True),\n",
791
+ " ('Libya', True),\n",
792
+ " ('Pakistan', True),\n",
793
+ " ('Armenia', True),\n",
794
+ " ('Comoros', True),\n",
795
+ " ('Afghanistan', True),\n",
796
+ " ('Palestinian Territories', False),\n",
797
+ " ('Nicaragua', True),\n",
798
+ " ('Niger', True),\n",
799
+ " ('Lebanon', True),\n",
800
+ " ('Lesotho', True),\n",
801
+ " ('Uzbekistan', True),\n",
802
+ " ('North Cyprus', False),\n",
803
+ " ('Kuwait', True),\n",
804
+ " ('Congo (Brazzaville)', False),\n",
805
+ " ('Peru', True),\n",
806
+ " ('Vietnam', True),\n",
807
+ " ('Togo', True),\n",
808
+ " ('Belarus', True),\n",
809
+ " ('Madagascar', True),\n",
810
+ " ('Costa Rica', True),\n",
811
+ " ('Luxembourg', True),\n",
812
+ " ('Panama', True),\n",
813
+ " ('Paraguay', True),\n",
814
+ " ('Jamaica', True),\n",
815
+ " ('Maldives', True),\n",
816
+ " ('Haiti', True),\n",
817
+ " ('Burundi', True),\n",
818
+ " ('Congo (Kinshasa)', False),\n",
819
+ " ('Central African Republic', True),\n",
820
+ " ('Trinidad and Tobago', True),\n",
821
+ " ('South Sudan', True),\n",
822
+ " ('Somalia', True),\n",
823
+ " ('Syria', True),\n",
824
+ " ('Qatar', True),\n",
825
+ " ('Bhutan', True),\n",
826
+ " ('Sudan', True),\n",
827
+ " ('Angola', True),\n",
828
+ " ('Belize', True),\n",
829
+ " ('Suriname', True),\n",
830
+ " ('Somaliland region', False),\n",
831
+ " ('Oman', True),\n",
832
+ " ('Djibouti', True),\n",
833
+ " ('Guyana', True),\n",
834
+ " ('Cuba', True)]"
835
+ ]
836
+ },
837
+ "execution_count": 68,
838
+ "metadata": {},
839
+ "output_type": "execute_result"
840
+ }
841
+ ],
842
+ "source": [
843
+ "c3"
844
+ ]
845
+ },
846
+ {
847
+ "cell_type": "code",
848
+ "execution_count": 37,
849
+ "id": "ff74b057-7281-4ab2-82c5-367e949fbbed",
850
+ "metadata": {},
851
+ "outputs": [
852
+ {
853
+ "data": {
854
+ "text/plain": [
855
+ "['Hong Kong S.A.R. of China',\n",
856
+ " 'Russia',\n",
857
+ " 'North Macedonia',\n",
858
+ " 'Kosovo',\n",
859
+ " 'United States',\n",
860
+ " 'Czech Republic',\n",
861
+ " 'South Korea',\n",
862
+ " 'Taiwan Province of China',\n",
863
+ " 'Myanmar',\n",
864
+ " 'Burkina Faso',\n",
865
+ " 'Palestinian Territories',\n",
866
+ " 'North Cyprus',\n",
867
+ " 'Congo (Brazzaville)',\n",
868
+ " 'Congo (Kinshasa)',\n",
869
+ " 'Somaliland region']"
870
+ ]
871
+ },
872
+ "execution_count": 37,
873
+ "metadata": {},
874
+ "output_type": "execute_result"
875
+ }
876
+ ],
877
+ "source": [
878
+ "num = 0\n",
879
+ "missing = []\n",
880
+ "for pair in c3:\n",
881
+ " if pair[1]:\n",
882
+ " num +=1\n",
883
+ " else:\n",
884
+ " missing.append(pair[0]) \n",
885
+ "num\n",
886
+ "missing"
887
+ ]
888
+ },
889
+ {
890
+ "cell_type": "code",
891
+ "execution_count": 44,
892
+ "id": "50f20260-3ed6-4f4e-a558-e3c6374ecb26",
893
+ "metadata": {},
894
+ "outputs": [
895
+ {
896
+ "data": {
897
+ "text/plain": [
898
+ "'Africa'"
899
+ ]
900
+ },
901
+ "execution_count": 44,
902
+ "metadata": {},
903
+ "output_type": "execute_result"
904
+ }
905
+ ],
906
+ "source": [
907
+ "df_csv.loc[df_csv['Country'] == \"Madagascar\", 'Continent'].iloc[0]"
908
+ ]
909
+ },
910
+ {
911
+ "cell_type": "code",
912
+ "execution_count": 50,
913
+ "id": "9dfa66ef-1c2b-4893-8993-107c2e02a2c8",
914
+ "metadata": {},
915
+ "outputs": [
916
+ {
917
+ "data": {
918
+ "text/html": [
919
+ "<div>\n",
920
+ "<style scoped>\n",
921
+ " .dataframe tbody tr th:only-of-type {\n",
922
+ " vertical-align: middle;\n",
923
+ " }\n",
924
+ "\n",
925
+ " .dataframe tbody tr th {\n",
926
+ " vertical-align: top;\n",
927
+ " }\n",
928
+ "\n",
929
+ " .dataframe thead th {\n",
930
+ " text-align: right;\n",
931
+ " }\n",
932
+ "</style>\n",
933
+ "<table border=\"1\" class=\"dataframe\">\n",
934
+ " <thead>\n",
935
+ " <tr style=\"text-align: right;\">\n",
936
+ " <th></th>\n",
937
+ " <th>Country name</th>\n",
938
+ " <th>year</th>\n",
939
+ " <th>Life Ladder</th>\n",
940
+ " <th>Log GDP per capita</th>\n",
941
+ " <th>Social support</th>\n",
942
+ " <th>Healthy life expectancy at birth</th>\n",
943
+ " <th>Freedom to make life choices</th>\n",
944
+ " <th>Generosity</th>\n",
945
+ " <th>Perceptions of corruption</th>\n",
946
+ " <th>Positive affect</th>\n",
947
+ " <th>Negative affect</th>\n",
948
+ " <th>Continent</th>\n",
949
+ " </tr>\n",
950
+ " </thead>\n",
951
+ " <tbody>\n",
952
+ " <tr>\n",
953
+ " <th>1948</th>\n",
954
+ " <td>Zimbabwe</td>\n",
955
+ " <td>2020</td>\n",
956
+ " <td>3.159802</td>\n",
957
+ " <td>7.828757</td>\n",
958
+ " <td>0.717243</td>\n",
959
+ " <td>56.799999</td>\n",
960
+ " <td>0.643303</td>\n",
961
+ " <td>-0.008696</td>\n",
962
+ " <td>0.788523</td>\n",
963
+ " <td>0.702573</td>\n",
964
+ " <td>0.345736</td>\n",
965
+ " <td>&lt;pandas.core.indexing._iLocIndexer object at 0...</td>\n",
966
+ " </tr>\n",
967
+ " <tr>\n",
968
+ " <th>174</th>\n",
969
+ " <td>Benin</td>\n",
970
+ " <td>2020</td>\n",
971
+ " <td>4.407746</td>\n",
972
+ " <td>8.102292</td>\n",
973
+ " <td>0.506636</td>\n",
974
+ " <td>55.099998</td>\n",
975
+ " <td>0.783115</td>\n",
976
+ " <td>-0.083489</td>\n",
977
+ " <td>0.531884</td>\n",
978
+ " <td>0.608585</td>\n",
979
+ " <td>0.304512</td>\n",
980
+ " <td>&lt;pandas.core.indexing._iLocIndexer object at 0...</td>\n",
981
+ " </tr>\n",
982
+ " <tr>\n",
983
+ " <th>1835</th>\n",
984
+ " <td>United Kingdom</td>\n",
985
+ " <td>2020</td>\n",
986
+ " <td>6.798177</td>\n",
987
+ " <td>10.625811</td>\n",
988
+ " <td>0.929353</td>\n",
989
+ " <td>72.699997</td>\n",
990
+ " <td>0.884624</td>\n",
991
+ " <td>0.202508</td>\n",
992
+ " <td>0.490204</td>\n",
993
+ " <td>0.758164</td>\n",
994
+ " <td>0.224655</td>\n",
995
+ " <td>&lt;pandas.core.indexing._iLocIndexer object at 0...</td>\n",
996
+ " </tr>\n",
997
+ " <tr>\n",
998
+ " <th>1394</th>\n",
999
+ " <td>Philippines</td>\n",
1000
+ " <td>2020</td>\n",
1001
+ " <td>5.079585</td>\n",
1002
+ " <td>9.061443</td>\n",
1003
+ " <td>0.781140</td>\n",
1004
+ " <td>62.099998</td>\n",
1005
+ " <td>0.932042</td>\n",
1006
+ " <td>-0.115543</td>\n",
1007
+ " <td>0.744284</td>\n",
1008
+ " <td>0.803562</td>\n",
1009
+ " <td>0.326889</td>\n",
1010
+ " <td>&lt;pandas.core.indexing._iLocIndexer object at 0...</td>\n",
1011
+ " </tr>\n",
1012
+ " <tr>\n",
1013
+ " <th>785</th>\n",
1014
+ " <td>Iraq</td>\n",
1015
+ " <td>2020</td>\n",
1016
+ " <td>4.785165</td>\n",
1017
+ " <td>9.167186</td>\n",
1018
+ " <td>0.707847</td>\n",
1019
+ " <td>61.400002</td>\n",
1020
+ " <td>0.700215</td>\n",
1021
+ " <td>-0.020748</td>\n",
1022
+ " <td>0.849109</td>\n",
1023
+ " <td>0.644464</td>\n",
1024
+ " <td>0.531539</td>\n",
1025
+ " <td>&lt;pandas.core.indexing._iLocIndexer object at 0...</td>\n",
1026
+ " </tr>\n",
1027
+ " </tbody>\n",
1028
+ "</table>\n",
1029
+ "</div>"
1030
+ ],
1031
+ "text/plain": [
1032
+ " Country name year Life Ladder Log GDP per capita Social support \\\n",
1033
+ "1948 Zimbabwe 2020 3.159802 7.828757 0.717243 \n",
1034
+ "174 Benin 2020 4.407746 8.102292 0.506636 \n",
1035
+ "1835 United Kingdom 2020 6.798177 10.625811 0.929353 \n",
1036
+ "1394 Philippines 2020 5.079585 9.061443 0.781140 \n",
1037
+ "785 Iraq 2020 4.785165 9.167186 0.707847 \n",
1038
+ "\n",
1039
+ " Healthy life expectancy at birth Freedom to make life choices \\\n",
1040
+ "1948 56.799999 0.643303 \n",
1041
+ "174 55.099998 0.783115 \n",
1042
+ "1835 72.699997 0.884624 \n",
1043
+ "1394 62.099998 0.932042 \n",
1044
+ "785 61.400002 0.700215 \n",
1045
+ "\n",
1046
+ " Generosity Perceptions of corruption Positive affect Negative affect \\\n",
1047
+ "1948 -0.008696 0.788523 0.702573 0.345736 \n",
1048
+ "174 -0.083489 0.531884 0.608585 0.304512 \n",
1049
+ "1835 0.202508 0.490204 0.758164 0.224655 \n",
1050
+ "1394 -0.115543 0.744284 0.803562 0.326889 \n",
1051
+ "785 -0.020748 0.849109 0.644464 0.531539 \n",
1052
+ "\n",
1053
+ " Continent \n",
1054
+ "1948 <pandas.core.indexing._iLocIndexer object at 0... \n",
1055
+ "174 <pandas.core.indexing._iLocIndexer object at 0... \n",
1056
+ "1835 <pandas.core.indexing._iLocIndexer object at 0... \n",
1057
+ "1394 <pandas.core.indexing._iLocIndexer object at 0... \n",
1058
+ "785 <pandas.core.indexing._iLocIndexer object at 0... "
1059
+ ]
1060
+ },
1061
+ "execution_count": 50,
1062
+ "metadata": {},
1063
+ "output_type": "execute_result"
1064
+ }
1065
+ ],
1066
+ "source": [
1067
+ "df_dedup.head()"
1068
+ ]
1069
+ },
1070
+ {
1071
+ "cell_type": "code",
1072
+ "execution_count": 74,
1073
+ "id": "b1fcd392-abfb-42a8-8485-f3fbd6a155d1",
1074
+ "metadata": {},
1075
+ "outputs": [],
1076
+ "source": [
1077
+ "df_cont = df_dedup.set_index('Country').join(df_csv.set_index('Country'), on='Country', how='left')"
1078
+ ]
1079
+ },
1080
+ {
1081
+ "cell_type": "code",
1082
+ "execution_count": 77,
1083
+ "id": "55ec121c-534e-4e25-88e9-5ab8267fd66b",
1084
+ "metadata": {},
1085
+ "outputs": [],
1086
+ "source": [
1087
+ "df_cont = df_cont.reset_index()"
1088
+ ]
1089
+ },
1090
+ {
1091
+ "cell_type": "code",
1092
+ "execution_count": 78,
1093
+ "id": "8ddaf798-772d-489d-b2fc-32d4cd76ae50",
1094
+ "metadata": {},
1095
+ "outputs": [
1096
+ {
1097
+ "data": {
1098
+ "text/plain": [
1099
+ "166"
1100
+ ]
1101
+ },
1102
+ "execution_count": 78,
1103
+ "metadata": {},
1104
+ "output_type": "execute_result"
1105
+ }
1106
+ ],
1107
+ "source": [
1108
+ "len(df_cont)"
1109
+ ]
1110
+ },
1111
+ {
1112
+ "cell_type": "code",
1113
+ "execution_count": 79,
1114
+ "id": "7420265a-e079-443c-9be0-01becf73a836",
1115
+ "metadata": {},
1116
+ "outputs": [
1117
+ {
1118
+ "data": {
1119
+ "text/html": [
1120
+ "<div>\n",
1121
+ "<style scoped>\n",
1122
+ " .dataframe tbody tr th:only-of-type {\n",
1123
+ " vertical-align: middle;\n",
1124
+ " }\n",
1125
+ "\n",
1126
+ " .dataframe tbody tr th {\n",
1127
+ " vertical-align: top;\n",
1128
+ " }\n",
1129
+ "\n",
1130
+ " .dataframe thead th {\n",
1131
+ " text-align: right;\n",
1132
+ " }\n",
1133
+ "</style>\n",
1134
+ "<table border=\"1\" class=\"dataframe\">\n",
1135
+ " <thead>\n",
1136
+ " <tr style=\"text-align: right;\">\n",
1137
+ " <th></th>\n",
1138
+ " <th>Country</th>\n",
1139
+ " <th>year</th>\n",
1140
+ " <th>Life Ladder</th>\n",
1141
+ " <th>Log GDP per capita</th>\n",
1142
+ " <th>Social support</th>\n",
1143
+ " <th>Healthy life expectancy at birth</th>\n",
1144
+ " <th>Freedom to make life choices</th>\n",
1145
+ " <th>Generosity</th>\n",
1146
+ " <th>Perceptions of corruption</th>\n",
1147
+ " <th>Positive affect</th>\n",
1148
+ " <th>Negative affect</th>\n",
1149
+ " <th>Continent</th>\n",
1150
+ " </tr>\n",
1151
+ " </thead>\n",
1152
+ " <tbody>\n",
1153
+ " <tr>\n",
1154
+ " <th>0</th>\n",
1155
+ " <td>Zimbabwe</td>\n",
1156
+ " <td>2020</td>\n",
1157
+ " <td>3.159802</td>\n",
1158
+ " <td>7.828757</td>\n",
1159
+ " <td>0.717243</td>\n",
1160
+ " <td>56.799999</td>\n",
1161
+ " <td>0.643303</td>\n",
1162
+ " <td>-0.008696</td>\n",
1163
+ " <td>0.788523</td>\n",
1164
+ " <td>0.702573</td>\n",
1165
+ " <td>0.345736</td>\n",
1166
+ " <td>Africa</td>\n",
1167
+ " </tr>\n",
1168
+ " <tr>\n",
1169
+ " <th>1</th>\n",
1170
+ " <td>Benin</td>\n",
1171
+ " <td>2020</td>\n",
1172
+ " <td>4.407746</td>\n",
1173
+ " <td>8.102292</td>\n",
1174
+ " <td>0.506636</td>\n",
1175
+ " <td>55.099998</td>\n",
1176
+ " <td>0.783115</td>\n",
1177
+ " <td>-0.083489</td>\n",
1178
+ " <td>0.531884</td>\n",
1179
+ " <td>0.608585</td>\n",
1180
+ " <td>0.304512</td>\n",
1181
+ " <td>Africa</td>\n",
1182
+ " </tr>\n",
1183
+ " <tr>\n",
1184
+ " <th>2</th>\n",
1185
+ " <td>United Kingdom</td>\n",
1186
+ " <td>2020</td>\n",
1187
+ " <td>6.798177</td>\n",
1188
+ " <td>10.625811</td>\n",
1189
+ " <td>0.929353</td>\n",
1190
+ " <td>72.699997</td>\n",
1191
+ " <td>0.884624</td>\n",
1192
+ " <td>0.202508</td>\n",
1193
+ " <td>0.490204</td>\n",
1194
+ " <td>0.758164</td>\n",
1195
+ " <td>0.224655</td>\n",
1196
+ " <td>Europe</td>\n",
1197
+ " </tr>\n",
1198
+ " <tr>\n",
1199
+ " <th>3</th>\n",
1200
+ " <td>Philippines</td>\n",
1201
+ " <td>2020</td>\n",
1202
+ " <td>5.079585</td>\n",
1203
+ " <td>9.061443</td>\n",
1204
+ " <td>0.781140</td>\n",
1205
+ " <td>62.099998</td>\n",
1206
+ " <td>0.932042</td>\n",
1207
+ " <td>-0.115543</td>\n",
1208
+ " <td>0.744284</td>\n",
1209
+ " <td>0.803562</td>\n",
1210
+ " <td>0.326889</td>\n",
1211
+ " <td>Asia</td>\n",
1212
+ " </tr>\n",
1213
+ " <tr>\n",
1214
+ " <th>4</th>\n",
1215
+ " <td>Iraq</td>\n",
1216
+ " <td>2020</td>\n",
1217
+ " <td>4.785165</td>\n",
1218
+ " <td>9.167186</td>\n",
1219
+ " <td>0.707847</td>\n",
1220
+ " <td>61.400002</td>\n",
1221
+ " <td>0.700215</td>\n",
1222
+ " <td>-0.020748</td>\n",
1223
+ " <td>0.849109</td>\n",
1224
+ " <td>0.644464</td>\n",
1225
+ " <td>0.531539</td>\n",
1226
+ " <td>Asia</td>\n",
1227
+ " </tr>\n",
1228
+ " </tbody>\n",
1229
+ "</table>\n",
1230
+ "</div>"
1231
+ ],
1232
+ "text/plain": [
1233
+ " Country year Life Ladder Log GDP per capita Social support \\\n",
1234
+ "0 Zimbabwe 2020 3.159802 7.828757 0.717243 \n",
1235
+ "1 Benin 2020 4.407746 8.102292 0.506636 \n",
1236
+ "2 United Kingdom 2020 6.798177 10.625811 0.929353 \n",
1237
+ "3 Philippines 2020 5.079585 9.061443 0.781140 \n",
1238
+ "4 Iraq 2020 4.785165 9.167186 0.707847 \n",
1239
+ "\n",
1240
+ " Healthy life expectancy at birth Freedom to make life choices Generosity \\\n",
1241
+ "0 56.799999 0.643303 -0.008696 \n",
1242
+ "1 55.099998 0.783115 -0.083489 \n",
1243
+ "2 72.699997 0.884624 0.202508 \n",
1244
+ "3 62.099998 0.932042 -0.115543 \n",
1245
+ "4 61.400002 0.700215 -0.020748 \n",
1246
+ "\n",
1247
+ " Perceptions of corruption Positive affect Negative affect Continent \n",
1248
+ "0 0.788523 0.702573 0.345736 Africa \n",
1249
+ "1 0.531884 0.608585 0.304512 Africa \n",
1250
+ "2 0.490204 0.758164 0.224655 Europe \n",
1251
+ "3 0.744284 0.803562 0.326889 Asia \n",
1252
+ "4 0.849109 0.644464 0.531539 Asia "
1253
+ ]
1254
+ },
1255
+ "execution_count": 79,
1256
+ "metadata": {},
1257
+ "output_type": "execute_result"
1258
+ }
1259
+ ],
1260
+ "source": [
1261
+ "df_cont.head()"
1262
+ ]
1263
+ },
1264
+ {
1265
+ "cell_type": "code",
1266
+ "execution_count": 81,
1267
+ "id": "fb26fc2f-f591-4e66-9357-0928c2c46e89",
1268
+ "metadata": {},
1269
+ "outputs": [],
1270
+ "source": [
1271
+ "# I updated the name of the output so that I don't accidentally overwrite the manual work I did at the end to add in the last few outliers.\n",
1272
+ "#df_cont.to_csv(\"Assets/Countries/base-combined-countries.csv\")"
1273
+ ]
1274
+ },
1275
+ {
1276
+ "cell_type": "code",
1277
+ "execution_count": 83,
1278
+ "id": "445a79b2-0023-4812-b606-1ff9cb7720e7",
1279
+ "metadata": {},
1280
+ "outputs": [],
1281
+ "source": [
1282
+ "df3 = df_csv.set_index('Country').join(df_dedup.set_index('Country'), on='Country', how='left')"
1283
+ ]
1284
+ },
1285
+ {
1286
+ "cell_type": "code",
1287
+ "execution_count": 87,
1288
+ "id": "59c3d6bb-11ea-4b4f-9a9e-d9b58561e8f2",
1289
+ "metadata": {},
1290
+ "outputs": [],
1291
+ "source": [
1292
+ "df3 = df3[df3.year.isnull()]"
1293
+ ]
1294
+ },
1295
+ {
1296
+ "cell_type": "code",
1297
+ "execution_count": 88,
1298
+ "id": "3b76dce1-a02f-4b09-bc44-b0e28271bc56",
1299
+ "metadata": {},
1300
+ "outputs": [
1301
+ {
1302
+ "data": {
1303
+ "text/html": [
1304
+ "<div>\n",
1305
+ "<style scoped>\n",
1306
+ " .dataframe tbody tr th:only-of-type {\n",
1307
+ " vertical-align: middle;\n",
1308
+ " }\n",
1309
+ "\n",
1310
+ " .dataframe tbody tr th {\n",
1311
+ " vertical-align: top;\n",
1312
+ " }\n",
1313
+ "\n",
1314
+ " .dataframe thead th {\n",
1315
+ " text-align: right;\n",
1316
+ " }\n",
1317
+ "</style>\n",
1318
+ "<table border=\"1\" class=\"dataframe\">\n",
1319
+ " <thead>\n",
1320
+ " <tr style=\"text-align: right;\">\n",
1321
+ " <th></th>\n",
1322
+ " <th>Continent</th>\n",
1323
+ " <th>year</th>\n",
1324
+ " <th>Life Ladder</th>\n",
1325
+ " <th>Log GDP per capita</th>\n",
1326
+ " <th>Social support</th>\n",
1327
+ " <th>Healthy life expectancy at birth</th>\n",
1328
+ " <th>Freedom to make life choices</th>\n",
1329
+ " <th>Generosity</th>\n",
1330
+ " <th>Perceptions of corruption</th>\n",
1331
+ " <th>Positive affect</th>\n",
1332
+ " <th>Negative affect</th>\n",
1333
+ " </tr>\n",
1334
+ " <tr>\n",
1335
+ " <th>Country</th>\n",
1336
+ " <th></th>\n",
1337
+ " <th></th>\n",
1338
+ " <th></th>\n",
1339
+ " <th></th>\n",
1340
+ " <th></th>\n",
1341
+ " <th></th>\n",
1342
+ " <th></th>\n",
1343
+ " <th></th>\n",
1344
+ " <th></th>\n",
1345
+ " <th></th>\n",
1346
+ " <th></th>\n",
1347
+ " </tr>\n",
1348
+ " </thead>\n",
1349
+ " <tbody>\n",
1350
+ " <tr>\n",
1351
+ " <th>Burkina</th>\n",
1352
+ " <td>Africa</td>\n",
1353
+ " <td>NaN</td>\n",
1354
+ " <td>NaN</td>\n",
1355
+ " <td>NaN</td>\n",
1356
+ " <td>NaN</td>\n",
1357
+ " <td>NaN</td>\n",
1358
+ " <td>NaN</td>\n",
1359
+ " <td>NaN</td>\n",
1360
+ " <td>NaN</td>\n",
1361
+ " <td>NaN</td>\n",
1362
+ " <td>NaN</td>\n",
1363
+ " </tr>\n",
1364
+ " <tr>\n",
1365
+ " <th>Cape Verde</th>\n",
1366
+ " <td>Africa</td>\n",
1367
+ " <td>NaN</td>\n",
1368
+ " <td>NaN</td>\n",
1369
+ " <td>NaN</td>\n",
1370
+ " <td>NaN</td>\n",
1371
+ " <td>NaN</td>\n",
1372
+ " <td>NaN</td>\n",
1373
+ " <td>NaN</td>\n",
1374
+ " <td>NaN</td>\n",
1375
+ " <td>NaN</td>\n",
1376
+ " <td>NaN</td>\n",
1377
+ " </tr>\n",
1378
+ " <tr>\n",
1379
+ " <th>Congo</th>\n",
1380
+ " <td>Africa</td>\n",
1381
+ " <td>NaN</td>\n",
1382
+ " <td>NaN</td>\n",
1383
+ " <td>NaN</td>\n",
1384
+ " <td>NaN</td>\n",
1385
+ " <td>NaN</td>\n",
1386
+ " <td>NaN</td>\n",
1387
+ " <td>NaN</td>\n",
1388
+ " <td>NaN</td>\n",
1389
+ " <td>NaN</td>\n",
1390
+ " <td>NaN</td>\n",
1391
+ " </tr>\n",
1392
+ " <tr>\n",
1393
+ " <th>Congo, Democratic Republic of</th>\n",
1394
+ " <td>Africa</td>\n",
1395
+ " <td>NaN</td>\n",
1396
+ " <td>NaN</td>\n",
1397
+ " <td>NaN</td>\n",
1398
+ " <td>NaN</td>\n",
1399
+ " <td>NaN</td>\n",
1400
+ " <td>NaN</td>\n",
1401
+ " <td>NaN</td>\n",
1402
+ " <td>NaN</td>\n",
1403
+ " <td>NaN</td>\n",
1404
+ " <td>NaN</td>\n",
1405
+ " </tr>\n",
1406
+ " <tr>\n",
1407
+ " <th>Equatorial Guinea</th>\n",
1408
+ " <td>Africa</td>\n",
1409
+ " <td>NaN</td>\n",
1410
+ " <td>NaN</td>\n",
1411
+ " <td>NaN</td>\n",
1412
+ " <td>NaN</td>\n",
1413
+ " <td>NaN</td>\n",
1414
+ " <td>NaN</td>\n",
1415
+ " <td>NaN</td>\n",
1416
+ " <td>NaN</td>\n",
1417
+ " <td>NaN</td>\n",
1418
+ " <td>NaN</td>\n",
1419
+ " </tr>\n",
1420
+ " <tr>\n",
1421
+ " <th>Eritrea</th>\n",
1422
+ " <td>Africa</td>\n",
1423
+ " <td>NaN</td>\n",
1424
+ " <td>NaN</td>\n",
1425
+ " <td>NaN</td>\n",
1426
+ " <td>NaN</td>\n",
1427
+ " <td>NaN</td>\n",
1428
+ " <td>NaN</td>\n",
1429
+ " <td>NaN</td>\n",
1430
+ " <td>NaN</td>\n",
1431
+ " <td>NaN</td>\n",
1432
+ " <td>NaN</td>\n",
1433
+ " </tr>\n",
1434
+ " <tr>\n",
1435
+ " <th>Guinea-Bissau</th>\n",
1436
+ " <td>Africa</td>\n",
1437
+ " <td>NaN</td>\n",
1438
+ " <td>NaN</td>\n",
1439
+ " <td>NaN</td>\n",
1440
+ " <td>NaN</td>\n",
1441
+ " <td>NaN</td>\n",
1442
+ " <td>NaN</td>\n",
1443
+ " <td>NaN</td>\n",
1444
+ " <td>NaN</td>\n",
1445
+ " <td>NaN</td>\n",
1446
+ " <td>NaN</td>\n",
1447
+ " </tr>\n",
1448
+ " <tr>\n",
1449
+ " <th>Sao Tome and Principe</th>\n",
1450
+ " <td>Africa</td>\n",
1451
+ " <td>NaN</td>\n",
1452
+ " <td>NaN</td>\n",
1453
+ " <td>NaN</td>\n",
1454
+ " <td>NaN</td>\n",
1455
+ " <td>NaN</td>\n",
1456
+ " <td>NaN</td>\n",
1457
+ " <td>NaN</td>\n",
1458
+ " <td>NaN</td>\n",
1459
+ " <td>NaN</td>\n",
1460
+ " <td>NaN</td>\n",
1461
+ " </tr>\n",
1462
+ " <tr>\n",
1463
+ " <th>Seychelles</th>\n",
1464
+ " <td>Africa</td>\n",
1465
+ " <td>NaN</td>\n",
1466
+ " <td>NaN</td>\n",
1467
+ " <td>NaN</td>\n",
1468
+ " <td>NaN</td>\n",
1469
+ " <td>NaN</td>\n",
1470
+ " <td>NaN</td>\n",
1471
+ " <td>NaN</td>\n",
1472
+ " <td>NaN</td>\n",
1473
+ " <td>NaN</td>\n",
1474
+ " <td>NaN</td>\n",
1475
+ " </tr>\n",
1476
+ " <tr>\n",
1477
+ " <th>Brunei</th>\n",
1478
+ " <td>Asia</td>\n",
1479
+ " <td>NaN</td>\n",
1480
+ " <td>NaN</td>\n",
1481
+ " <td>NaN</td>\n",
1482
+ " <td>NaN</td>\n",
1483
+ " <td>NaN</td>\n",
1484
+ " <td>NaN</td>\n",
1485
+ " <td>NaN</td>\n",
1486
+ " <td>NaN</td>\n",
1487
+ " <td>NaN</td>\n",
1488
+ " <td>NaN</td>\n",
1489
+ " </tr>\n",
1490
+ " <tr>\n",
1491
+ " <th>Burma (Myanmar)</th>\n",
1492
+ " <td>Asia</td>\n",
1493
+ " <td>NaN</td>\n",
1494
+ " <td>NaN</td>\n",
1495
+ " <td>NaN</td>\n",
1496
+ " <td>NaN</td>\n",
1497
+ " <td>NaN</td>\n",
1498
+ " <td>NaN</td>\n",
1499
+ " <td>NaN</td>\n",
1500
+ " <td>NaN</td>\n",
1501
+ " <td>NaN</td>\n",
1502
+ " <td>NaN</td>\n",
1503
+ " </tr>\n",
1504
+ " <tr>\n",
1505
+ " <th>East Timor</th>\n",
1506
+ " <td>Asia</td>\n",
1507
+ " <td>NaN</td>\n",
1508
+ " <td>NaN</td>\n",
1509
+ " <td>NaN</td>\n",
1510
+ " <td>NaN</td>\n",
1511
+ " <td>NaN</td>\n",
1512
+ " <td>NaN</td>\n",
1513
+ " <td>NaN</td>\n",
1514
+ " <td>NaN</td>\n",
1515
+ " <td>NaN</td>\n",
1516
+ " <td>NaN</td>\n",
1517
+ " </tr>\n",
1518
+ " <tr>\n",
1519
+ " <th>Korea, North</th>\n",
1520
+ " <td>Asia</td>\n",
1521
+ " <td>NaN</td>\n",
1522
+ " <td>NaN</td>\n",
1523
+ " <td>NaN</td>\n",
1524
+ " <td>NaN</td>\n",
1525
+ " <td>NaN</td>\n",
1526
+ " <td>NaN</td>\n",
1527
+ " <td>NaN</td>\n",
1528
+ " <td>NaN</td>\n",
1529
+ " <td>NaN</td>\n",
1530
+ " <td>NaN</td>\n",
1531
+ " </tr>\n",
1532
+ " <tr>\n",
1533
+ " <th>Korea, South</th>\n",
1534
+ " <td>Asia</td>\n",
1535
+ " <td>NaN</td>\n",
1536
+ " <td>NaN</td>\n",
1537
+ " <td>NaN</td>\n",
1538
+ " <td>NaN</td>\n",
1539
+ " <td>NaN</td>\n",
1540
+ " <td>NaN</td>\n",
1541
+ " <td>NaN</td>\n",
1542
+ " <td>NaN</td>\n",
1543
+ " <td>NaN</td>\n",
1544
+ " <td>NaN</td>\n",
1545
+ " </tr>\n",
1546
+ " <tr>\n",
1547
+ " <th>Russian Federation</th>\n",
1548
+ " <td>Asia</td>\n",
1549
+ " <td>NaN</td>\n",
1550
+ " <td>NaN</td>\n",
1551
+ " <td>NaN</td>\n",
1552
+ " <td>NaN</td>\n",
1553
+ " <td>NaN</td>\n",
1554
+ " <td>NaN</td>\n",
1555
+ " <td>NaN</td>\n",
1556
+ " <td>NaN</td>\n",
1557
+ " <td>NaN</td>\n",
1558
+ " <td>NaN</td>\n",
1559
+ " </tr>\n",
1560
+ " <tr>\n",
1561
+ " <th>Andorra</th>\n",
1562
+ " <td>Europe</td>\n",
1563
+ " <td>NaN</td>\n",
1564
+ " <td>NaN</td>\n",
1565
+ " <td>NaN</td>\n",
1566
+ " <td>NaN</td>\n",
1567
+ " <td>NaN</td>\n",
1568
+ " <td>NaN</td>\n",
1569
+ " <td>NaN</td>\n",
1570
+ " <td>NaN</td>\n",
1571
+ " <td>NaN</td>\n",
1572
+ " <td>NaN</td>\n",
1573
+ " </tr>\n",
1574
+ " <tr>\n",
1575
+ " <th>CZ</th>\n",
1576
+ " <td>Europe</td>\n",
1577
+ " <td>NaN</td>\n",
1578
+ " <td>NaN</td>\n",
1579
+ " <td>NaN</td>\n",
1580
+ " <td>NaN</td>\n",
1581
+ " <td>NaN</td>\n",
1582
+ " <td>NaN</td>\n",
1583
+ " <td>NaN</td>\n",
1584
+ " <td>NaN</td>\n",
1585
+ " <td>NaN</td>\n",
1586
+ " <td>NaN</td>\n",
1587
+ " </tr>\n",
1588
+ " <tr>\n",
1589
+ " <th>Liechtenstein</th>\n",
1590
+ " <td>Europe</td>\n",
1591
+ " <td>NaN</td>\n",
1592
+ " <td>NaN</td>\n",
1593
+ " <td>NaN</td>\n",
1594
+ " <td>NaN</td>\n",
1595
+ " <td>NaN</td>\n",
1596
+ " <td>NaN</td>\n",
1597
+ " <td>NaN</td>\n",
1598
+ " <td>NaN</td>\n",
1599
+ " <td>NaN</td>\n",
1600
+ " <td>NaN</td>\n",
1601
+ " </tr>\n",
1602
+ " <tr>\n",
1603
+ " <th>Macedonia</th>\n",
1604
+ " <td>Europe</td>\n",
1605
+ " <td>NaN</td>\n",
1606
+ " <td>NaN</td>\n",
1607
+ " <td>NaN</td>\n",
1608
+ " <td>NaN</td>\n",
1609
+ " <td>NaN</td>\n",
1610
+ " <td>NaN</td>\n",
1611
+ " <td>NaN</td>\n",
1612
+ " <td>NaN</td>\n",
1613
+ " <td>NaN</td>\n",
1614
+ " <td>NaN</td>\n",
1615
+ " </tr>\n",
1616
+ " <tr>\n",
1617
+ " <th>Monaco</th>\n",
1618
+ " <td>Europe</td>\n",
1619
+ " <td>NaN</td>\n",
1620
+ " <td>NaN</td>\n",
1621
+ " <td>NaN</td>\n",
1622
+ " <td>NaN</td>\n",
1623
+ " <td>NaN</td>\n",
1624
+ " <td>NaN</td>\n",
1625
+ " <td>NaN</td>\n",
1626
+ " <td>NaN</td>\n",
1627
+ " <td>NaN</td>\n",
1628
+ " <td>NaN</td>\n",
1629
+ " </tr>\n",
1630
+ " <tr>\n",
1631
+ " <th>San Marino</th>\n",
1632
+ " <td>Europe</td>\n",
1633
+ " <td>NaN</td>\n",
1634
+ " <td>NaN</td>\n",
1635
+ " <td>NaN</td>\n",
1636
+ " <td>NaN</td>\n",
1637
+ " <td>NaN</td>\n",
1638
+ " <td>NaN</td>\n",
1639
+ " <td>NaN</td>\n",
1640
+ " <td>NaN</td>\n",
1641
+ " <td>NaN</td>\n",
1642
+ " <td>NaN</td>\n",
1643
+ " </tr>\n",
1644
+ " <tr>\n",
1645
+ " <th>Vatican City</th>\n",
1646
+ " <td>Europe</td>\n",
1647
+ " <td>NaN</td>\n",
1648
+ " <td>NaN</td>\n",
1649
+ " <td>NaN</td>\n",
1650
+ " <td>NaN</td>\n",
1651
+ " <td>NaN</td>\n",
1652
+ " <td>NaN</td>\n",
1653
+ " <td>NaN</td>\n",
1654
+ " <td>NaN</td>\n",
1655
+ " <td>NaN</td>\n",
1656
+ " <td>NaN</td>\n",
1657
+ " </tr>\n",
1658
+ " <tr>\n",
1659
+ " <th>Antigua and Barbuda</th>\n",
1660
+ " <td>North America</td>\n",
1661
+ " <td>NaN</td>\n",
1662
+ " <td>NaN</td>\n",
1663
+ " <td>NaN</td>\n",
1664
+ " <td>NaN</td>\n",
1665
+ " <td>NaN</td>\n",
1666
+ " <td>NaN</td>\n",
1667
+ " <td>NaN</td>\n",
1668
+ " <td>NaN</td>\n",
1669
+ " <td>NaN</td>\n",
1670
+ " <td>NaN</td>\n",
1671
+ " </tr>\n",
1672
+ " <tr>\n",
1673
+ " <th>Bahamas</th>\n",
1674
+ " <td>North America</td>\n",
1675
+ " <td>NaN</td>\n",
1676
+ " <td>NaN</td>\n",
1677
+ " <td>NaN</td>\n",
1678
+ " <td>NaN</td>\n",
1679
+ " <td>NaN</td>\n",
1680
+ " <td>NaN</td>\n",
1681
+ " <td>NaN</td>\n",
1682
+ " <td>NaN</td>\n",
1683
+ " <td>NaN</td>\n",
1684
+ " <td>NaN</td>\n",
1685
+ " </tr>\n",
1686
+ " <tr>\n",
1687
+ " <th>Barbados</th>\n",
1688
+ " <td>North America</td>\n",
1689
+ " <td>NaN</td>\n",
1690
+ " <td>NaN</td>\n",
1691
+ " <td>NaN</td>\n",
1692
+ " <td>NaN</td>\n",
1693
+ " <td>NaN</td>\n",
1694
+ " <td>NaN</td>\n",
1695
+ " <td>NaN</td>\n",
1696
+ " <td>NaN</td>\n",
1697
+ " <td>NaN</td>\n",
1698
+ " <td>NaN</td>\n",
1699
+ " </tr>\n",
1700
+ " <tr>\n",
1701
+ " <th>Dominica</th>\n",
1702
+ " <td>North America</td>\n",
1703
+ " <td>NaN</td>\n",
1704
+ " <td>NaN</td>\n",
1705
+ " <td>NaN</td>\n",
1706
+ " <td>NaN</td>\n",
1707
+ " <td>NaN</td>\n",
1708
+ " <td>NaN</td>\n",
1709
+ " <td>NaN</td>\n",
1710
+ " <td>NaN</td>\n",
1711
+ " <td>NaN</td>\n",
1712
+ " <td>NaN</td>\n",
1713
+ " </tr>\n",
1714
+ " <tr>\n",
1715
+ " <th>Grenada</th>\n",
1716
+ " <td>North America</td>\n",
1717
+ " <td>NaN</td>\n",
1718
+ " <td>NaN</td>\n",
1719
+ " <td>NaN</td>\n",
1720
+ " <td>NaN</td>\n",
1721
+ " <td>NaN</td>\n",
1722
+ " <td>NaN</td>\n",
1723
+ " <td>NaN</td>\n",
1724
+ " <td>NaN</td>\n",
1725
+ " <td>NaN</td>\n",
1726
+ " <td>NaN</td>\n",
1727
+ " </tr>\n",
1728
+ " <tr>\n",
1729
+ " <th>Saint Kitts and Nevis</th>\n",
1730
+ " <td>North America</td>\n",
1731
+ " <td>NaN</td>\n",
1732
+ " <td>NaN</td>\n",
1733
+ " <td>NaN</td>\n",
1734
+ " <td>NaN</td>\n",
1735
+ " <td>NaN</td>\n",
1736
+ " <td>NaN</td>\n",
1737
+ " <td>NaN</td>\n",
1738
+ " <td>NaN</td>\n",
1739
+ " <td>NaN</td>\n",
1740
+ " <td>NaN</td>\n",
1741
+ " </tr>\n",
1742
+ " <tr>\n",
1743
+ " <th>Saint Lucia</th>\n",
1744
+ " <td>North America</td>\n",
1745
+ " <td>NaN</td>\n",
1746
+ " <td>NaN</td>\n",
1747
+ " <td>NaN</td>\n",
1748
+ " <td>NaN</td>\n",
1749
+ " <td>NaN</td>\n",
1750
+ " <td>NaN</td>\n",
1751
+ " <td>NaN</td>\n",
1752
+ " <td>NaN</td>\n",
1753
+ " <td>NaN</td>\n",
1754
+ " <td>NaN</td>\n",
1755
+ " </tr>\n",
1756
+ " <tr>\n",
1757
+ " <th>Saint Vincent and the Grenadines</th>\n",
1758
+ " <td>North America</td>\n",
1759
+ " <td>NaN</td>\n",
1760
+ " <td>NaN</td>\n",
1761
+ " <td>NaN</td>\n",
1762
+ " <td>NaN</td>\n",
1763
+ " <td>NaN</td>\n",
1764
+ " <td>NaN</td>\n",
1765
+ " <td>NaN</td>\n",
1766
+ " <td>NaN</td>\n",
1767
+ " <td>NaN</td>\n",
1768
+ " <td>NaN</td>\n",
1769
+ " </tr>\n",
1770
+ " <tr>\n",
1771
+ " <th>US</th>\n",
1772
+ " <td>North America</td>\n",
1773
+ " <td>NaN</td>\n",
1774
+ " <td>NaN</td>\n",
1775
+ " <td>NaN</td>\n",
1776
+ " <td>NaN</td>\n",
1777
+ " <td>NaN</td>\n",
1778
+ " <td>NaN</td>\n",
1779
+ " <td>NaN</td>\n",
1780
+ " <td>NaN</td>\n",
1781
+ " <td>NaN</td>\n",
1782
+ " <td>NaN</td>\n",
1783
+ " </tr>\n",
1784
+ " <tr>\n",
1785
+ " <th>Fiji</th>\n",
1786
+ " <td>Oceania</td>\n",
1787
+ " <td>NaN</td>\n",
1788
+ " <td>NaN</td>\n",
1789
+ " <td>NaN</td>\n",
1790
+ " <td>NaN</td>\n",
1791
+ " <td>NaN</td>\n",
1792
+ " <td>NaN</td>\n",
1793
+ " <td>NaN</td>\n",
1794
+ " <td>NaN</td>\n",
1795
+ " <td>NaN</td>\n",
1796
+ " <td>NaN</td>\n",
1797
+ " </tr>\n",
1798
+ " <tr>\n",
1799
+ " <th>Kiribati</th>\n",
1800
+ " <td>Oceania</td>\n",
1801
+ " <td>NaN</td>\n",
1802
+ " <td>NaN</td>\n",
1803
+ " <td>NaN</td>\n",
1804
+ " <td>NaN</td>\n",
1805
+ " <td>NaN</td>\n",
1806
+ " <td>NaN</td>\n",
1807
+ " <td>NaN</td>\n",
1808
+ " <td>NaN</td>\n",
1809
+ " <td>NaN</td>\n",
1810
+ " <td>NaN</td>\n",
1811
+ " </tr>\n",
1812
+ " <tr>\n",
1813
+ " <th>Marshall Islands</th>\n",
1814
+ " <td>Oceania</td>\n",
1815
+ " <td>NaN</td>\n",
1816
+ " <td>NaN</td>\n",
1817
+ " <td>NaN</td>\n",
1818
+ " <td>NaN</td>\n",
1819
+ " <td>NaN</td>\n",
1820
+ " <td>NaN</td>\n",
1821
+ " <td>NaN</td>\n",
1822
+ " <td>NaN</td>\n",
1823
+ " <td>NaN</td>\n",
1824
+ " <td>NaN</td>\n",
1825
+ " </tr>\n",
1826
+ " <tr>\n",
1827
+ " <th>Micronesia</th>\n",
1828
+ " <td>Oceania</td>\n",
1829
+ " <td>NaN</td>\n",
1830
+ " <td>NaN</td>\n",
1831
+ " <td>NaN</td>\n",
1832
+ " <td>NaN</td>\n",
1833
+ " <td>NaN</td>\n",
1834
+ " <td>NaN</td>\n",
1835
+ " <td>NaN</td>\n",
1836
+ " <td>NaN</td>\n",
1837
+ " <td>NaN</td>\n",
1838
+ " <td>NaN</td>\n",
1839
+ " </tr>\n",
1840
+ " <tr>\n",
1841
+ " <th>Nauru</th>\n",
1842
+ " <td>Oceania</td>\n",
1843
+ " <td>NaN</td>\n",
1844
+ " <td>NaN</td>\n",
1845
+ " <td>NaN</td>\n",
1846
+ " <td>NaN</td>\n",
1847
+ " <td>NaN</td>\n",
1848
+ " <td>NaN</td>\n",
1849
+ " <td>NaN</td>\n",
1850
+ " <td>NaN</td>\n",
1851
+ " <td>NaN</td>\n",
1852
+ " <td>NaN</td>\n",
1853
+ " </tr>\n",
1854
+ " <tr>\n",
1855
+ " <th>Palau</th>\n",
1856
+ " <td>Oceania</td>\n",
1857
+ " <td>NaN</td>\n",
1858
+ " <td>NaN</td>\n",
1859
+ " <td>NaN</td>\n",
1860
+ " <td>NaN</td>\n",
1861
+ " <td>NaN</td>\n",
1862
+ " <td>NaN</td>\n",
1863
+ " <td>NaN</td>\n",
1864
+ " <td>NaN</td>\n",
1865
+ " <td>NaN</td>\n",
1866
+ " <td>NaN</td>\n",
1867
+ " </tr>\n",
1868
+ " <tr>\n",
1869
+ " <th>Papua New Guinea</th>\n",
1870
+ " <td>Oceania</td>\n",
1871
+ " <td>NaN</td>\n",
1872
+ " <td>NaN</td>\n",
1873
+ " <td>NaN</td>\n",
1874
+ " <td>NaN</td>\n",
1875
+ " <td>NaN</td>\n",
1876
+ " <td>NaN</td>\n",
1877
+ " <td>NaN</td>\n",
1878
+ " <td>NaN</td>\n",
1879
+ " <td>NaN</td>\n",
1880
+ " <td>NaN</td>\n",
1881
+ " </tr>\n",
1882
+ " <tr>\n",
1883
+ " <th>Samoa</th>\n",
1884
+ " <td>Oceania</td>\n",
1885
+ " <td>NaN</td>\n",
1886
+ " <td>NaN</td>\n",
1887
+ " <td>NaN</td>\n",
1888
+ " <td>NaN</td>\n",
1889
+ " <td>NaN</td>\n",
1890
+ " <td>NaN</td>\n",
1891
+ " <td>NaN</td>\n",
1892
+ " <td>NaN</td>\n",
1893
+ " <td>NaN</td>\n",
1894
+ " <td>NaN</td>\n",
1895
+ " </tr>\n",
1896
+ " <tr>\n",
1897
+ " <th>Solomon Islands</th>\n",
1898
+ " <td>Oceania</td>\n",
1899
+ " <td>NaN</td>\n",
1900
+ " <td>NaN</td>\n",
1901
+ " <td>NaN</td>\n",
1902
+ " <td>NaN</td>\n",
1903
+ " <td>NaN</td>\n",
1904
+ " <td>NaN</td>\n",
1905
+ " <td>NaN</td>\n",
1906
+ " <td>NaN</td>\n",
1907
+ " <td>NaN</td>\n",
1908
+ " <td>NaN</td>\n",
1909
+ " </tr>\n",
1910
+ " <tr>\n",
1911
+ " <th>Tonga</th>\n",
1912
+ " <td>Oceania</td>\n",
1913
+ " <td>NaN</td>\n",
1914
+ " <td>NaN</td>\n",
1915
+ " <td>NaN</td>\n",
1916
+ " <td>NaN</td>\n",
1917
+ " <td>NaN</td>\n",
1918
+ " <td>NaN</td>\n",
1919
+ " <td>NaN</td>\n",
1920
+ " <td>NaN</td>\n",
1921
+ " <td>NaN</td>\n",
1922
+ " <td>NaN</td>\n",
1923
+ " </tr>\n",
1924
+ " <tr>\n",
1925
+ " <th>Tuvalu</th>\n",
1926
+ " <td>Oceania</td>\n",
1927
+ " <td>NaN</td>\n",
1928
+ " <td>NaN</td>\n",
1929
+ " <td>NaN</td>\n",
1930
+ " <td>NaN</td>\n",
1931
+ " <td>NaN</td>\n",
1932
+ " <td>NaN</td>\n",
1933
+ " <td>NaN</td>\n",
1934
+ " <td>NaN</td>\n",
1935
+ " <td>NaN</td>\n",
1936
+ " <td>NaN</td>\n",
1937
+ " </tr>\n",
1938
+ " <tr>\n",
1939
+ " <th>Vanuatu</th>\n",
1940
+ " <td>Oceania</td>\n",
1941
+ " <td>NaN</td>\n",
1942
+ " <td>NaN</td>\n",
1943
+ " <td>NaN</td>\n",
1944
+ " <td>NaN</td>\n",
1945
+ " <td>NaN</td>\n",
1946
+ " <td>NaN</td>\n",
1947
+ " <td>NaN</td>\n",
1948
+ " <td>NaN</td>\n",
1949
+ " <td>NaN</td>\n",
1950
+ " <td>NaN</td>\n",
1951
+ " </tr>\n",
1952
+ " </tbody>\n",
1953
+ "</table>\n",
1954
+ "</div>"
1955
+ ],
1956
+ "text/plain": [
1957
+ " Continent year Life Ladder \\\n",
1958
+ "Country \n",
1959
+ "Burkina Africa NaN NaN \n",
1960
+ "Cape Verde Africa NaN NaN \n",
1961
+ "Congo Africa NaN NaN \n",
1962
+ "Congo, Democratic Republic of Africa NaN NaN \n",
1963
+ "Equatorial Guinea Africa NaN NaN \n",
1964
+ "Eritrea Africa NaN NaN \n",
1965
+ "Guinea-Bissau Africa NaN NaN \n",
1966
+ "Sao Tome and Principe Africa NaN NaN \n",
1967
+ "Seychelles Africa NaN NaN \n",
1968
+ "Brunei Asia NaN NaN \n",
1969
+ "Burma (Myanmar) Asia NaN NaN \n",
1970
+ "East Timor Asia NaN NaN \n",
1971
+ "Korea, North Asia NaN NaN \n",
1972
+ "Korea, South Asia NaN NaN \n",
1973
+ "Russian Federation Asia NaN NaN \n",
1974
+ "Andorra Europe NaN NaN \n",
1975
+ "CZ Europe NaN NaN \n",
1976
+ "Liechtenstein Europe NaN NaN \n",
1977
+ "Macedonia Europe NaN NaN \n",
1978
+ "Monaco Europe NaN NaN \n",
1979
+ "San Marino Europe NaN NaN \n",
1980
+ "Vatican City Europe NaN NaN \n",
1981
+ "Antigua and Barbuda North America NaN NaN \n",
1982
+ "Bahamas North America NaN NaN \n",
1983
+ "Barbados North America NaN NaN \n",
1984
+ "Dominica North America NaN NaN \n",
1985
+ "Grenada North America NaN NaN \n",
1986
+ "Saint Kitts and Nevis North America NaN NaN \n",
1987
+ "Saint Lucia North America NaN NaN \n",
1988
+ "Saint Vincent and the Grenadines North America NaN NaN \n",
1989
+ "US North America NaN NaN \n",
1990
+ "Fiji Oceania NaN NaN \n",
1991
+ "Kiribati Oceania NaN NaN \n",
1992
+ "Marshall Islands Oceania NaN NaN \n",
1993
+ "Micronesia Oceania NaN NaN \n",
1994
+ "Nauru Oceania NaN NaN \n",
1995
+ "Palau Oceania NaN NaN \n",
1996
+ "Papua New Guinea Oceania NaN NaN \n",
1997
+ "Samoa Oceania NaN NaN \n",
1998
+ "Solomon Islands Oceania NaN NaN \n",
1999
+ "Tonga Oceania NaN NaN \n",
2000
+ "Tuvalu Oceania NaN NaN \n",
2001
+ "Vanuatu Oceania NaN NaN \n",
2002
+ "\n",
2003
+ " Log GDP per capita Social support \\\n",
2004
+ "Country \n",
2005
+ "Burkina NaN NaN \n",
2006
+ "Cape Verde NaN NaN \n",
2007
+ "Congo NaN NaN \n",
2008
+ "Congo, Democratic Republic of NaN NaN \n",
2009
+ "Equatorial Guinea NaN NaN \n",
2010
+ "Eritrea NaN NaN \n",
2011
+ "Guinea-Bissau NaN NaN \n",
2012
+ "Sao Tome and Principe NaN NaN \n",
2013
+ "Seychelles NaN NaN \n",
2014
+ "Brunei NaN NaN \n",
2015
+ "Burma (Myanmar) NaN NaN \n",
2016
+ "East Timor NaN NaN \n",
2017
+ "Korea, North NaN NaN \n",
2018
+ "Korea, South NaN NaN \n",
2019
+ "Russian Federation NaN NaN \n",
2020
+ "Andorra NaN NaN \n",
2021
+ "CZ NaN NaN \n",
2022
+ "Liechtenstein NaN NaN \n",
2023
+ "Macedonia NaN NaN \n",
2024
+ "Monaco NaN NaN \n",
2025
+ "San Marino NaN NaN \n",
2026
+ "Vatican City NaN NaN \n",
2027
+ "Antigua and Barbuda NaN NaN \n",
2028
+ "Bahamas NaN NaN \n",
2029
+ "Barbados NaN NaN \n",
2030
+ "Dominica NaN NaN \n",
2031
+ "Grenada NaN NaN \n",
2032
+ "Saint Kitts and Nevis NaN NaN \n",
2033
+ "Saint Lucia NaN NaN \n",
2034
+ "Saint Vincent and the Grenadines NaN NaN \n",
2035
+ "US NaN NaN \n",
2036
+ "Fiji NaN NaN \n",
2037
+ "Kiribati NaN NaN \n",
2038
+ "Marshall Islands NaN NaN \n",
2039
+ "Micronesia NaN NaN \n",
2040
+ "Nauru NaN NaN \n",
2041
+ "Palau NaN NaN \n",
2042
+ "Papua New Guinea NaN NaN \n",
2043
+ "Samoa NaN NaN \n",
2044
+ "Solomon Islands NaN NaN \n",
2045
+ "Tonga NaN NaN \n",
2046
+ "Tuvalu NaN NaN \n",
2047
+ "Vanuatu NaN NaN \n",
2048
+ "\n",
2049
+ " Healthy life expectancy at birth \\\n",
2050
+ "Country \n",
2051
+ "Burkina NaN \n",
2052
+ "Cape Verde NaN \n",
2053
+ "Congo NaN \n",
2054
+ "Congo, Democratic Republic of NaN \n",
2055
+ "Equatorial Guinea NaN \n",
2056
+ "Eritrea NaN \n",
2057
+ "Guinea-Bissau NaN \n",
2058
+ "Sao Tome and Principe NaN \n",
2059
+ "Seychelles NaN \n",
2060
+ "Brunei NaN \n",
2061
+ "Burma (Myanmar) NaN \n",
2062
+ "East Timor NaN \n",
2063
+ "Korea, North NaN \n",
2064
+ "Korea, South NaN \n",
2065
+ "Russian Federation NaN \n",
2066
+ "Andorra NaN \n",
2067
+ "CZ NaN \n",
2068
+ "Liechtenstein NaN \n",
2069
+ "Macedonia NaN \n",
2070
+ "Monaco NaN \n",
2071
+ "San Marino NaN \n",
2072
+ "Vatican City NaN \n",
2073
+ "Antigua and Barbuda NaN \n",
2074
+ "Bahamas NaN \n",
2075
+ "Barbados NaN \n",
2076
+ "Dominica NaN \n",
2077
+ "Grenada NaN \n",
2078
+ "Saint Kitts and Nevis NaN \n",
2079
+ "Saint Lucia NaN \n",
2080
+ "Saint Vincent and the Grenadines NaN \n",
2081
+ "US NaN \n",
2082
+ "Fiji NaN \n",
2083
+ "Kiribati NaN \n",
2084
+ "Marshall Islands NaN \n",
2085
+ "Micronesia NaN \n",
2086
+ "Nauru NaN \n",
2087
+ "Palau NaN \n",
2088
+ "Papua New Guinea NaN \n",
2089
+ "Samoa NaN \n",
2090
+ "Solomon Islands NaN \n",
2091
+ "Tonga NaN \n",
2092
+ "Tuvalu NaN \n",
2093
+ "Vanuatu NaN \n",
2094
+ "\n",
2095
+ " Freedom to make life choices Generosity \\\n",
2096
+ "Country \n",
2097
+ "Burkina NaN NaN \n",
2098
+ "Cape Verde NaN NaN \n",
2099
+ "Congo NaN NaN \n",
2100
+ "Congo, Democratic Republic of NaN NaN \n",
2101
+ "Equatorial Guinea NaN NaN \n",
2102
+ "Eritrea NaN NaN \n",
2103
+ "Guinea-Bissau NaN NaN \n",
2104
+ "Sao Tome and Principe NaN NaN \n",
2105
+ "Seychelles NaN NaN \n",
2106
+ "Brunei NaN NaN \n",
2107
+ "Burma (Myanmar) NaN NaN \n",
2108
+ "East Timor NaN NaN \n",
2109
+ "Korea, North NaN NaN \n",
2110
+ "Korea, South NaN NaN \n",
2111
+ "Russian Federation NaN NaN \n",
2112
+ "Andorra NaN NaN \n",
2113
+ "CZ NaN NaN \n",
2114
+ "Liechtenstein NaN NaN \n",
2115
+ "Macedonia NaN NaN \n",
2116
+ "Monaco NaN NaN \n",
2117
+ "San Marino NaN NaN \n",
2118
+ "Vatican City NaN NaN \n",
2119
+ "Antigua and Barbuda NaN NaN \n",
2120
+ "Bahamas NaN NaN \n",
2121
+ "Barbados NaN NaN \n",
2122
+ "Dominica NaN NaN \n",
2123
+ "Grenada NaN NaN \n",
2124
+ "Saint Kitts and Nevis NaN NaN \n",
2125
+ "Saint Lucia NaN NaN \n",
2126
+ "Saint Vincent and the Grenadines NaN NaN \n",
2127
+ "US NaN NaN \n",
2128
+ "Fiji NaN NaN \n",
2129
+ "Kiribati NaN NaN \n",
2130
+ "Marshall Islands NaN NaN \n",
2131
+ "Micronesia NaN NaN \n",
2132
+ "Nauru NaN NaN \n",
2133
+ "Palau NaN NaN \n",
2134
+ "Papua New Guinea NaN NaN \n",
2135
+ "Samoa NaN NaN \n",
2136
+ "Solomon Islands NaN NaN \n",
2137
+ "Tonga NaN NaN \n",
2138
+ "Tuvalu NaN NaN \n",
2139
+ "Vanuatu NaN NaN \n",
2140
+ "\n",
2141
+ " Perceptions of corruption Positive affect \\\n",
2142
+ "Country \n",
2143
+ "Burkina NaN NaN \n",
2144
+ "Cape Verde NaN NaN \n",
2145
+ "Congo NaN NaN \n",
2146
+ "Congo, Democratic Republic of NaN NaN \n",
2147
+ "Equatorial Guinea NaN NaN \n",
2148
+ "Eritrea NaN NaN \n",
2149
+ "Guinea-Bissau NaN NaN \n",
2150
+ "Sao Tome and Principe NaN NaN \n",
2151
+ "Seychelles NaN NaN \n",
2152
+ "Brunei NaN NaN \n",
2153
+ "Burma (Myanmar) NaN NaN \n",
2154
+ "East Timor NaN NaN \n",
2155
+ "Korea, North NaN NaN \n",
2156
+ "Korea, South NaN NaN \n",
2157
+ "Russian Federation NaN NaN \n",
2158
+ "Andorra NaN NaN \n",
2159
+ "CZ NaN NaN \n",
2160
+ "Liechtenstein NaN NaN \n",
2161
+ "Macedonia NaN NaN \n",
2162
+ "Monaco NaN NaN \n",
2163
+ "San Marino NaN NaN \n",
2164
+ "Vatican City NaN NaN \n",
2165
+ "Antigua and Barbuda NaN NaN \n",
2166
+ "Bahamas NaN NaN \n",
2167
+ "Barbados NaN NaN \n",
2168
+ "Dominica NaN NaN \n",
2169
+ "Grenada NaN NaN \n",
2170
+ "Saint Kitts and Nevis NaN NaN \n",
2171
+ "Saint Lucia NaN NaN \n",
2172
+ "Saint Vincent and the Grenadines NaN NaN \n",
2173
+ "US NaN NaN \n",
2174
+ "Fiji NaN NaN \n",
2175
+ "Kiribati NaN NaN \n",
2176
+ "Marshall Islands NaN NaN \n",
2177
+ "Micronesia NaN NaN \n",
2178
+ "Nauru NaN NaN \n",
2179
+ "Palau NaN NaN \n",
2180
+ "Papua New Guinea NaN NaN \n",
2181
+ "Samoa NaN NaN \n",
2182
+ "Solomon Islands NaN NaN \n",
2183
+ "Tonga NaN NaN \n",
2184
+ "Tuvalu NaN NaN \n",
2185
+ "Vanuatu NaN NaN \n",
2186
+ "\n",
2187
+ " Negative affect \n",
2188
+ "Country \n",
2189
+ "Burkina NaN \n",
2190
+ "Cape Verde NaN \n",
2191
+ "Congo NaN \n",
2192
+ "Congo, Democratic Republic of NaN \n",
2193
+ "Equatorial Guinea NaN \n",
2194
+ "Eritrea NaN \n",
2195
+ "Guinea-Bissau NaN \n",
2196
+ "Sao Tome and Principe NaN \n",
2197
+ "Seychelles NaN \n",
2198
+ "Brunei NaN \n",
2199
+ "Burma (Myanmar) NaN \n",
2200
+ "East Timor NaN \n",
2201
+ "Korea, North NaN \n",
2202
+ "Korea, South NaN \n",
2203
+ "Russian Federation NaN \n",
2204
+ "Andorra NaN \n",
2205
+ "CZ NaN \n",
2206
+ "Liechtenstein NaN \n",
2207
+ "Macedonia NaN \n",
2208
+ "Monaco NaN \n",
2209
+ "San Marino NaN \n",
2210
+ "Vatican City NaN \n",
2211
+ "Antigua and Barbuda NaN \n",
2212
+ "Bahamas NaN \n",
2213
+ "Barbados NaN \n",
2214
+ "Dominica NaN \n",
2215
+ "Grenada NaN \n",
2216
+ "Saint Kitts and Nevis NaN \n",
2217
+ "Saint Lucia NaN \n",
2218
+ "Saint Vincent and the Grenadines NaN \n",
2219
+ "US NaN \n",
2220
+ "Fiji NaN \n",
2221
+ "Kiribati NaN \n",
2222
+ "Marshall Islands NaN \n",
2223
+ "Micronesia NaN \n",
2224
+ "Nauru NaN \n",
2225
+ "Palau NaN \n",
2226
+ "Papua New Guinea NaN \n",
2227
+ "Samoa NaN \n",
2228
+ "Solomon Islands NaN \n",
2229
+ "Tonga NaN \n",
2230
+ "Tuvalu NaN \n",
2231
+ "Vanuatu NaN "
2232
+ ]
2233
+ },
2234
+ "execution_count": 88,
2235
+ "metadata": {},
2236
+ "output_type": "execute_result"
2237
+ }
2238
+ ],
2239
+ "source": [
2240
+ "df3"
2241
+ ]
2242
+ },
2243
+ {
2244
+ "cell_type": "markdown",
2245
+ "id": "db01b828-d1b1-4708-b6bd-3b2dbed54746",
2246
+ "metadata": {},
2247
+ "source": [
2248
+ "> Note that I updated these in the spreadsheet manually with Excel because it was faster to do it by hand... I should go back when I have time to do it programmatically..."
2249
+ ]
2250
+ }
2251
+ ],
2252
+ "metadata": {
2253
+ "kernelspec": {
2254
+ "display_name": "Python 3 (ipykernel)",
2255
+ "language": "python",
2256
+ "name": "python3"
2257
+ },
2258
+ "language_info": {
2259
+ "codemirror_mode": {
2260
+ "name": "ipython",
2261
+ "version": 3
2262
+ },
2263
+ "file_extension": ".py",
2264
+ "mimetype": "text/x-python",
2265
+ "name": "python",
2266
+ "nbconvert_exporter": "python",
2267
+ "pygments_lexer": "ipython3",
2268
+ "version": "3.8.8"
2269
+ }
2270
+ },
2271
+ "nbformat": 4,
2272
+ "nbformat_minor": 5
2273
+ }
Assets/Countries/.ipynb_checkpoints/combined-countries-checkpoint.csv ADDED
@@ -0,0 +1,167 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ,Country,year,Life Ladder,Log GDP per capita,Social support,Healthy life expectancy at birth,Freedom to make life choices,Generosity,Perceptions of corruption,Positive affect,Negative affect,Continent
2
+ 0,Zimbabwe,2020,3.159802198410034,7.828756809234619,0.7172426581382751,56.79999923706055,0.6433029770851135,-0.00869576446712017,0.7885227799415588,0.702572762966156,0.34573638439178467,Africa
3
+ 1,Benin,2020,4.407745838165283,8.10229206085205,0.5066360831260681,55.099998474121094,0.7831146717071533,-0.08348871022462845,0.5318836569786072,0.6085846424102783,0.3045124411582947,Africa
4
+ 2,United Kingdom,2020,6.798177242279053,10.625810623168945,0.9293532371520996,72.69999694824219,0.8846240043640137,0.20250841975212097,0.49020394682884216,0.758163571357727,0.2246551215648651,Europe
5
+ 3,Philippines,2020,5.079585075378418,9.061443328857422,0.7811403870582581,62.099998474121094,0.9320417046546936,-0.11554288119077682,0.7442836761474609,0.8035621047019958,0.3268890082836151,Asia
6
+ 4,Iraq,2020,4.785165309906006,9.16718578338623,0.7078474760055542,61.400001525878906,0.7002145648002625,-0.020748287439346313,0.8491087555885315,0.6444642543792725,0.5315389037132263,Asia
7
+ 5,Belgium,2020,6.838760852813721,10.770537376403809,0.9035586714744568,72.4000015258789,0.7669178247451782,-0.16378448903560638,0.6336267590522766,0.6465103030204773,0.2601887881755829,Europe
8
+ 6,Iran,2020,4.864528179168701,,0.7572186589241028,66.5999984741211,0.5995944738388062,,0.7099016904830933,0.5824205279350281,0.47024500370025635,Asia
9
+ 7,Poland,2020,6.139455318450928,10.371203422546387,0.9531717300415039,70.0999984741211,0.7674286961555481,-0.006559355650097132,0.7868736386299133,0.759842574596405,0.32893791794776917,Europe
10
+ 8,Portugal,2020,5.767792224884033,10.370820045471191,0.8749903440475464,72.80000305175781,0.9131307601928711,-0.23809020221233368,0.8671571612358093,0.6477688550949097,0.3828126788139343,Europe
11
+ 9,India,2020,4.225281238555908,8.70277214050293,0.616639256477356,60.900001525878906,0.9063913226127625,0.07482379674911499,0.7801240086555481,0.7524339556694031,0.3831625282764435,Asia
12
+ 10,Israel,2020,7.194928169250488,10.538053512573242,0.9590721726417542,73.69999694824219,0.831315815448761,-0.04937167465686798,0.7476390600204468,0.6213983297348022,0.2428257316350937,Asia
13
+ 11,Iceland,2020,7.575489521026611,10.824200630187988,0.9832860827445984,73.0,0.9486271739006042,0.16027399897575378,0.6440638899803162,0.8630176186561584,0.17179514467716217,Europe
14
+ 12,United Arab Emirates,2020,6.458392143249512,11.052889823913574,0.8267555832862854,67.5,0.942161500453949,0.060019660741090775,,0.7516599297523499,0.2984803020954132,Asia
15
+ 13,Hungary,2020,6.038049697875977,10.335147857666016,0.9434003829956055,68.4000015258789,0.7709680795669556,-0.12040461599826813,0.8361051082611084,0.7352383732795715,0.24005194008350372,Europe
16
+ 14,Hong Kong S.A.R. of China,2020,5.295341491699219,,0.8129429817199707,,0.7054522633552551,,0.3803512156009674,0.608647346496582,0.210313618183136,
17
+ 15,Bolivia,2020,5.559258937835693,8.997989654541016,0.8048108816146851,64.19999694824219,0.8770319223403931,-0.05376378819346428,0.8682082891464233,0.7898184657096863,0.3817911744117737,South America
18
+ 16,Russia,2020,5.495288848876953,10.162235260009766,0.8870201706886292,65.0999984741211,0.7144664525985718,-0.07061229646205902,0.8230475187301636,0.6452149748802185,0.18952153623104095,
19
+ 17,Saudi Arabia,2020,6.559588432312012,10.700662612915039,0.8902559280395508,66.9000015258789,0.8842201232910156,-0.11053171008825302,,0.7536076307296753,0.25119906663894653,Asia
20
+ 18,Ireland,2020,7.03493070602417,11.322803497314453,0.9603110551834106,72.5,0.8820982575416565,0.013816552236676216,0.3556327223777771,0.7966610193252563,0.24644726514816284,Europe
21
+ 19,Italy,2020,6.488356113433838,10.56257152557373,0.8898240327835083,74.0,0.7181554436683655,-0.14993725717067719,0.8440945744514465,0.6702133417129517,0.3110021650791168,Europe
22
+ 20,Ukraine,2020,5.2696757316589355,9.427873611450195,0.884686291217804,65.19999694824219,0.7842734456062317,0.1263442039489746,0.9456689953804016,0.6877206563949585,0.28473618626594543,Europe
23
+ 21,Kenya,2020,4.546584129333496,8.36528205871582,0.6737176179885864,61.29999923706055,0.7020344734191895,0.2599695920944214,0.8365160226821899,0.7334348559379578,0.2969804108142853,Africa
24
+ 22,Latvia,2020,6.229008674621582,10.299590110778809,0.9280121922492981,67.4000015258789,0.8201116919517517,-0.077660471200943,0.808821976184845,0.7136284112930298,0.20158237218856812,Europe
25
+ 23,Laos,2020,5.284390926361084,8.959955215454102,0.6603962779045105,59.5,0.9150282144546509,0.14143069088459015,0.7479977011680603,0.8216802477836609,0.3583492636680603,Asia
26
+ 24,Nigeria,2020,5.50294828414917,8.484203338623047,0.7392894625663757,50.5,0.713061511516571,0.09940405935049057,0.9127744436264038,0.7439777255058289,0.31588682532310486,Africa
27
+ 25,Austria,2020,7.213489055633545,10.851118087768555,0.924831211566925,73.5999984741211,0.9119098782539368,0.01103174313902855,0.4638301730155945,0.7693166136741638,0.20649965107440948,Europe
28
+ 26,Kyrgyzstan,2020,6.24958610534668,8.503411293029785,0.9022229909896851,64.69999694824219,0.9348853230476379,0.10286574065685272,0.9313175082206726,0.8030253648757935,0.25781306624412537,Asia
29
+ 27,North Macedonia,2020,5.053664207458496,9.690014839172363,0.7503741979598999,65.55988311767578,0.7872847318649292,0.13127434253692627,0.8774211406707764,0.6046268343925476,0.3651260733604431,
30
+ 28,Kosovo,2020,6.294414043426514,,0.7923744916915894,,0.8798375725746155,,0.9098938703536987,0.7262398600578308,0.20145803689956665,
31
+ 29,Norway,2020,7.290032386779785,11.042160034179688,0.9559799432754517,73.4000015258789,0.9645611047744751,0.07514853775501251,0.2710832953453064,0.823093831539154,0.2160339206457138,Europe
32
+ 30,United States,2020,7.028088092803955,11.000656127929688,0.9373698234558105,68.0999984741211,0.8504472970962524,0.03410335257649422,0.6781246066093445,0.7873719930648804,0.2954990267753601,
33
+ 31,Kazakhstan,2020,6.168269157409668,10.135335922241211,0.966448962688446,65.80000305175781,0.8721001148223877,-0.056175168603658676,0.6607988476753235,0.6841026544570923,0.15035991370677948,Asia
34
+ 32,Bahrain,2020,6.173175811767578,10.619903564453125,0.8477450609207153,69.69999694824219,0.9452325701713562,0.13244104385375977,,0.7897949814796448,0.29683545231819153,Asia
35
+ 33,Uruguay,2020,6.309681415557861,9.9371919631958,0.9210703372955322,69.19999694824219,0.9077619314193726,-0.08398690074682236,0.49100783467292786,0.8073509335517883,0.2646920680999756,South America
36
+ 34,Jordan,2020,4.093991756439209,9.149994850158691,0.7088398933410645,67.19999694824219,0.7785334587097168,-0.14982588589191437,,,,Asia
37
+ 35,Japan,2020,6.1179633140563965,10.579547882080078,0.8872491121292114,75.19999694824219,0.806036114692688,-0.2587452828884125,0.6086985468864441,0.7424694299697876,0.18646100163459778,Asia
38
+ 36,Bangladesh,2020,5.27998685836792,8.47219467163086,0.7393379211425781,65.30000305175781,0.7774671912193298,-0.008851290680468082,0.7416591644287109,0.5823808312416077,0.33170878887176514,Asia
39
+ 37,Ivory Coast,2020,5.256503582000732,8.564923286437988,0.6131063103675842,50.70000076293945,0.7699980139732361,0.015563689172267914,0.7766872644424438,0.6926469206809998,0.3399190902709961,Africa
40
+ 38,Bosnia and Herzegovina,2020,5.5158162117004395,9.583344459533691,0.8985186815261841,68.4000015258789,0.740250825881958,0.13795417547225952,0.9160521626472473,0.6442373394966125,0.3254123032093048,Europe
41
+ 39,Greece,2020,5.787615776062012,10.214579582214355,0.7785365581512451,72.80000305175781,0.5646136403083801,-0.2408064603805542,0.7643245458602905,0.6844578385353088,0.32168421149253845,Europe
42
+ 40,Australia,2020,7.1373677253723145,10.75986385345459,0.9365170001983643,74.19999694824219,0.9052829742431641,0.21003030240535736,0.49109482765197754,0.7691817283630371,0.20507767796516418,Oceania
43
+ 41,Croatia,2020,6.507992267608643,10.165817260742188,0.9229134917259216,71.4000015258789,0.8366576433181763,-0.06296810507774353,0.9609392881393433,0.7427805066108704,0.28560975193977356,Europe
44
+ 42,Tunisia,2020,4.73081111907959,9.230624198913574,0.7190132141113281,67.5,0.6677581071853638,-0.20181423425674438,0.877354085445404,0.5846338868141174,0.43877434730529785,Africa
45
+ 43,Spain,2020,6.502175331115723,10.488059043884277,0.934934675693512,75.0,0.7832565307617188,-0.12061331421136856,0.7299774885177612,0.6861776113510132,0.31661710143089294,Europe
46
+ 44,Denmark,2020,7.514631271362305,10.909995079040527,0.9473713636398315,73.0,0.9379318356513977,0.05229302495718002,0.2138417512178421,0.8176636695861816,0.2271018922328949,Europe
47
+ 45,Cameroon,2020,5.241077899932861,8.174633979797363,0.7200466394424438,54.29999923706055,0.6745091676712036,0.049266181886196136,0.8365172147750854,0.6296146512031555,0.3864789605140686,Africa
48
+ 46,Czech Republic,2020,6.897091388702393,10.530134201049805,0.9640536904335022,71.30000305175781,0.9064220190048218,-0.1270223706960678,0.8836995959281921,0.8320576548576355,0.29044169187545776,
49
+ 47,Cyprus,2020,6.259810447692871,,0.8055593967437744,74.0999984741211,0.7627823352813721,,0.8162317276000977,0.7588630318641663,0.28352245688438416,Europe
50
+ 48,Sweden,2020,7.314341068267822,10.83790397644043,0.9355823397636414,72.80000305175781,0.9511815905570984,0.09081844985485077,0.20344014465808868,0.7663760781288147,0.2219332903623581,Europe
51
+ 49,Canada,2020,7.024904727935791,10.729514122009277,0.930610716342926,74.0,0.8868921995162964,0.049636855721473694,0.43401235342025757,0.7959487438201904,0.30667373538017273,North America
52
+ 50,South Korea,2020,5.79269552230835,10.64807415008545,0.8079522848129272,74.19999694824219,0.711480438709259,-0.1058678925037384,0.6646940112113953,0.6395556926727295,0.2470596581697464,
53
+ 51,Switzerland,2020,7.508435249328613,11.080892562866211,0.9463164806365967,74.69999694824219,0.917343258857727,-0.06350205838680267,0.2803671360015869,0.7687047123908997,0.19322898983955383,Europe
54
+ 52,Thailand,2020,5.884544372558594,9.769243240356445,0.8667026162147522,67.5999984741211,0.8404632806777954,0.2730555832386017,0.9183400273323059,0.7832698822021484,0.32616856694221497,Asia
55
+ 53,Taiwan Province of China,2020,6.751067638397217,,0.9008325338363647,,0.7988347411155701,,0.7105674147605896,0.8453933596611023,0.08273695409297943,
56
+ 54,Colombia,2020,5.709175109863281,9.495491027832031,0.7970352172851562,68.30000305175781,0.8401861190795898,-0.0846422091126442,0.807964026927948,0.7951326966285706,0.3401585817337036,South America
57
+ 55,Tajikistan,2020,5.373398780822754,8.08035659790039,0.7897445559501648,64.69999694824219,,-0.04046706482768059,0.5497864484786987,0.7488976120948792,0.3441612720489502,Asia
58
+ 56,Tanzania,2020,3.785684108734131,7.881270408630371,0.7398170828819275,58.5,0.83034348487854,0.29527199268341064,0.5206316709518433,0.6855331063270569,0.2711179256439209,Africa
59
+ 57,China,2020,5.771064758300781,9.701754570007324,0.808334469795227,69.9000015258789,0.8911229968070984,-0.1032143384218216,,0.789345383644104,0.24491822719573975,Asia
60
+ 58,Dominican Republic,2020,5.168409824371338,9.802446365356445,0.8061176538467407,66.4000015258789,0.8346429467201233,-0.1278340369462967,0.6361165642738342,0.7338669300079346,0.3139283061027527,North America
61
+ 59,Cambodia,2020,4.3769850730896,8.36193561553955,0.7244226336479187,62.400001525878906,0.9630754590034485,0.052429765462875366,0.8630539774894714,0.8779535293579102,0.3898516297340393,Asia
62
+ 60,Ghana,2020,5.319483280181885,8.589605331420898,0.6427033543586731,58.0,0.8237200379371643,0.19963206350803375,0.8470249176025391,0.7127659320831299,0.2527284324169159,Africa
63
+ 61,Slovakia,2020,6.519098281860352,10.331512451171875,0.9541599750518799,69.5,0.7618966102600098,-0.07487351447343826,0.9005336761474609,0.7635828852653503,0.27444788813591003,Europe
64
+ 62,Serbia,2020,6.04154634475708,9.788259506225586,0.8521018624305725,69.0,0.8434798717498779,0.14940130710601807,0.8244724869728088,0.6028461456298828,0.3575802743434906,Europe
65
+ 63,Uganda,2020,4.640909671783447,7.684450149536133,0.8004611730575562,56.5,0.6874821186065674,0.14711755514144897,0.8775872588157654,0.698948860168457,0.42470666766166687,Africa
66
+ 64,Germany,2020,7.3118977546691895,10.83349895477295,0.9050804972648621,72.80000305175781,0.8643560409545898,-0.06004804000258446,0.4240887761116028,0.7595943212509155,0.20592711865901947,Europe
67
+ 65,Georgia,2020,5.123143196105957,9.569304466247559,0.7183459401130676,64.0999984741211,0.7643523812294006,-0.22112546861171722,0.5827347040176392,0.6108949184417725,0.2945120632648468,Europe
68
+ 66,Brazil,2020,6.109717845916748,9.522140502929688,0.8308321237564087,66.80000305175781,0.7862350940704346,-0.05282001942396164,0.7287722229957581,0.6920238733291626,0.3891385495662689,South America
69
+ 67,France,2020,6.714111804962158,10.643280029296875,0.9473540186882019,74.19999694824219,0.8233863115310669,-0.16896052658557892,0.5646405816078186,0.731813907623291,0.23095043003559113,Europe
70
+ 68,Bulgaria,2020,5.597723007202148,9.990657806396484,0.9162423610687256,67.19999694824219,0.8182247877120972,-0.004322313703596592,0.9006329774856567,0.7058346271514893,0.22135105729103088,Europe
71
+ 69,Finland,2020,7.889349937438965,10.750446319580078,0.9616207480430603,72.0999984741211,0.9624236822128296,-0.11553198844194412,0.16363589465618134,0.7442921996116638,0.19289757311344147,Europe
72
+ 70,Ecuador,2020,5.354461669921875,9.243865013122559,0.8040085434913635,69.0999984741211,0.8285115361213684,-0.15709003806114197,0.8547804951667786,0.7899407148361206,0.4160279631614685,South America
73
+ 71,Ethiopia,2020,4.549219608306885,7.710982799530029,0.8231375813484192,59.5,0.768694281578064,0.18849685788154602,0.7838224172592163,0.6693886518478394,0.25151434540748596,Africa
74
+ 72,Slovenia,2020,6.462076187133789,10.477869987487793,0.9534375071525574,71.69999694824219,0.9584425687789917,-0.08135689049959183,0.7965574860572815,0.6099492311477661,0.3138525187969208,Europe
75
+ 73,Estonia,2020,6.452563762664795,10.458588600158691,0.9577704668045044,69.0,0.9542005658149719,-0.08227915316820145,0.39783477783203125,0.8069238066673279,0.1876794993877411,Europe
76
+ 74,El Salvador,2020,5.4619269371032715,9.018845558166504,0.6956243515014648,66.69999694824219,0.9239448308944702,-0.1264744997024536,0.5830363631248474,0.8389042019844055,0.32943978905677795,North America
77
+ 75,Turkey,2020,4.861554145812988,10.219083786010742,0.8567302227020264,67.5999984741211,0.5103858709335327,-0.11088898777961731,0.7744171619415283,0.38429245352745056,0.4403873085975647,Asia
78
+ 76,South Africa,2020,4.946800708770752,9.332463264465332,0.8910503387451172,57.29999923706055,0.7569462656974792,-0.014951311983168125,0.9124072194099426,0.8203377723693848,0.29427647590637207,Africa
79
+ 77,Egypt,2020,4.4723968505859375,9.382726669311523,0.6727254986763,62.29999923706055,0.7695503234863281,-0.1123419776558876,,0.5989086627960205,0.442033588886261,Africa
80
+ 78,Venezuela,2020,4.573829650878906,,0.8052242398262024,66.9000015258789,0.6118146181106567,,0.81131911277771,0.7223914265632629,0.396250456571579,South America
81
+ 79,Chile,2020,6.1506428718566895,10.0201416015625,0.8884122967720032,70.0999984741211,0.7813835740089417,0.03299075737595558,0.8118188381195068,0.8146027326583862,0.3360286056995392,South America
82
+ 80,Lithuania,2020,6.391378879547119,10.503606796264648,0.952544093132019,68.5,0.8240605592727661,-0.12178131192922592,0.829204797744751,0.6602295637130737,0.20191200077533722,Europe
83
+ 81,Moldova,2020,5.811628818511963,9.462109565734863,0.8740617632865906,66.4000015258789,0.8590832352638245,-0.05827857926487923,0.9414389729499817,0.7272245287895203,0.2678360641002655,Europe
84
+ 82,Netherlands,2020,7.504447937011719,10.900500297546387,0.9439561367034912,72.5,0.9345226287841797,0.15129804611206055,0.2806045114994049,0.7839906215667725,0.2465113252401352,Europe
85
+ 83,Mongolia,2020,6.011364936828613,9.395559310913086,0.9177891612052917,62.70000076293945,0.7184910178184509,0.1413574516773224,0.8428276777267456,0.6364434957504272,0.25998303294181824,Asia
86
+ 84,Mauritius,2020,6.015300273895264,9.972017288208008,0.8925659656524658,67.0,0.8425980806350708,-0.03669271990656853,0.771790087223053,0.7669844627380371,0.1384017914533615,Africa
87
+ 85,Mexico,2020,5.964221000671387,9.78218936920166,0.7788162231445312,68.9000015258789,0.8733469843864441,-0.1193898618221283,0.778165876865387,0.8101091384887695,0.29155611991882324,North America
88
+ 86,New Zealand,2020,7.257381916046143,10.600457191467285,0.9519907832145691,73.5999984741211,0.9181545972824097,0.1252596527338028,0.2827679514884949,0.8494150042533875,0.20854105055332184,Oceania
89
+ 87,Namibia,2020,4.451010227203369,9.10413932800293,0.7405703067779541,57.099998474121094,0.6656819581985474,-0.10388018190860748,0.8103548288345337,0.6479195356369019,0.24754208326339722,Africa
90
+ 88,Myanmar,2020,4.431364059448242,8.553914070129395,0.7957632541656494,59.599998474121094,0.8248707056045532,0.4702581763267517,0.6467021107673645,0.7997491955757141,0.2892182171344757,
91
+ 89,Malta,2020,6.156822681427002,,0.9379202723503113,72.19999694824219,0.9306004643440247,,0.674626350402832,0.6014958620071411,0.41091322898864746,Europe
92
+ 90,Zambia,2020,4.837992191314697,8.11658000946045,0.7668716311454773,56.29999923706055,0.7504224181175232,0.056029193103313446,0.8097497820854187,0.691082239151001,0.34452593326568604,Africa
93
+ 91,Argentina,2020,5.900567054748535,9.850449562072754,0.8971038460731506,69.19999694824219,0.8233916163444519,-0.12235432863235474,0.8157804608345032,0.7635238766670227,0.34249693155288696,South America
94
+ 92,Morocco,2020,4.80261754989624,8.870917320251465,0.5525200963020325,66.5,0.8189952373504639,-0.22857755422592163,0.8027402758598328,0.5871824026107788,0.2564311921596527,Africa
95
+ 93,Albania,2020,5.364909648895264,9.497251510620117,0.7101150155067444,69.30000305175781,0.7536710500717163,0.006968025118112564,0.8913589715957642,0.6786612272262573,0.26506611704826355,Europe
96
+ 94,Montenegro,2020,5.72216272354126,9.912668228149414,0.8871294856071472,68.9000015258789,0.8018550872802734,0.059815771877765656,0.8446871042251587,0.6032826900482178,0.41137781739234924,Europe
97
+ 95,Guinea,2019,4.767684459686279,7.849340438842773,0.6551241874694824,55.5,0.691399097442627,0.09681724011898041,0.7555854916572571,0.6846469044685364,0.4733884334564209,Africa
98
+ 96,Yemen,2019,4.19691276550293,,0.8700428009033203,57.5,0.6513082385063171,,0.7982282638549805,0.5428059101104736,0.2130432277917862,Asia
99
+ 97,Guatemala,2019,6.2621750831604,9.063875198364258,0.774074375629425,65.0999984741211,0.9006763100624084,-0.06230298802256584,0.7725779414176941,0.859412670135498,0.3107892572879791,North America
100
+ 98,Malaysia,2019,5.427954196929932,10.252403259277344,0.8424988389015198,67.19999694824219,0.9157786965370178,0.12332413345575333,0.7819439172744751,0.8341774940490723,0.17607168853282928,Asia
101
+ 99,Rwanda,2019,3.2681522369384766,7.7080607414245605,0.48945823311805725,61.70000076293945,0.868999183177948,0.06406588107347488,0.16797089576721191,0.7360679507255554,0.4176676869392395,Africa
102
+ 100,Sri Lanka,2019,4.21329927444458,9.478693962097168,0.8149391412734985,67.4000015258789,0.8242773413658142,0.051186613738536835,0.86334228515625,0.8163903951644897,0.3145427107810974,Asia
103
+ 101,Malawi,2019,3.869123697280884,6.965763092041016,0.5489560961723328,58.29999923706055,0.7648642063140869,0.003596819471567869,0.680247962474823,0.5366970300674438,0.348162442445755,Africa
104
+ 102,Nepal,2019,5.448724746704102,8.136457443237305,0.772273063659668,64.5999984741211,0.790347695350647,0.16697579622268677,0.7118424773216248,0.5357981324195862,0.35710030794143677,Asia
105
+ 103,Swaziland,2019,4.396114826202393,9.069709777832031,0.759097695350647,51.27039337158203,0.5966824293136597,-0.19073791801929474,0.7235077619552612,0.7776272892951965,0.27959516644477844,Africa
106
+ 104,Romania,2019,6.129942417144775,10.305913925170898,0.841905951499939,67.5,0.8475431799888611,-0.22142210602760315,0.9541307091712952,0.6974433660507202,0.24365922808647156,Europe
107
+ 105,Senegal,2019,5.488736629486084,8.130020141601562,0.6876140832901001,60.0,0.7588417530059814,-0.01880391500890255,0.7956734299659729,0.7889730334281921,0.3319258391857147,Africa
108
+ 106,Honduras,2019,5.930051326751709,8.653117179870605,0.7971483469009399,67.4000015258789,0.8461900353431702,0.06270892173051834,0.8149629235267639,0.8499549627304077,0.27888208627700806,North America
109
+ 107,Mali,2019,4.987991809844971,7.752494812011719,0.7545580863952637,52.20000076293945,0.6704050898551941,-0.03785175830125809,0.846340000629425,0.7115226984024048,0.35776451230049133,Africa
110
+ 108,Mauritania,2019,4.152619361877441,8.555842399597168,0.7981019616127014,57.29999923706055,0.6275051832199097,-0.10185665637254715,0.7428902983665466,0.6918314695358276,0.2597385048866272,Africa
111
+ 109,Turkmenistan,2019,5.474299907684326,9.65118408203125,0.9815017580986023,62.599998474121094,0.8915268778800964,0.2848806381225586,,0.5099145174026489,0.18334324657917023,Asia
112
+ 110,Burkina Faso,2019,4.7408928871154785,7.691488265991211,0.6831023693084717,54.400001525878906,0.6775468587875366,-0.004089894238859415,0.7293965816497803,0.6909258961677551,0.3647753894329071,
113
+ 111,Algeria,2019,4.744627475738525,9.336946487426758,0.8032586574554443,66.0999984741211,0.3850834369659424,0.005086520221084356,0.740609347820282,0.5849443078041077,0.21519775688648224,Africa
114
+ 112,Botswana,2019,3.4710848331451416,9.785069465637207,0.7736672163009644,59.599998474121094,0.8325426578521729,-0.23900093138217926,0.792079508304596,0.7117963433265686,0.2727217674255371,Africa
115
+ 113,Sierra Leone,2019,3.4473814964294434,7.449131965637207,0.6107797622680664,52.400001525878906,0.7177695631980896,0.07405570149421692,0.8738614320755005,0.5133752226829529,0.43813446164131165,Africa
116
+ 114,Mozambique,2019,4.932132720947266,7.154966831207275,0.742303729057312,55.20000076293945,0.8698102235794067,0.07274501770734787,0.6819004416465759,0.5872747302055359,0.384122759103775,Africa
117
+ 115,Singapore,2019,6.378359794616699,11.485980033874512,0.9249183535575867,77.0999984741211,0.9380417466163635,0.027229677885770798,0.06961960345506668,0.7225980162620544,0.13806915283203125,Asia
118
+ 116,Gambia,2019,5.1636271476745605,7.699349880218506,0.6938701272010803,55.29999923706055,0.6765952706336975,0.4101804792881012,0.7981081008911133,0.7728161811828613,0.40072327852249146,Africa
119
+ 117,Gabon,2019,4.914393424987793,9.607087135314941,0.7630516886711121,60.20000076293945,0.736349880695343,-0.20251981914043427,0.8462542295455933,0.6927024126052856,0.4129609763622284,Africa
120
+ 118,Indonesia,2019,5.346512794494629,9.376888275146484,0.8019180297851562,62.29999923706055,0.8658591508865356,0.5553480386734009,0.8607847690582275,0.8767140507698059,0.3017027974128723,Asia
121
+ 119,Azerbaijan,2019,5.173389434814453,9.575250625610352,0.886756420135498,65.80000305175781,0.8542485237121582,-0.2141629159450531,0.4572606682777405,0.6425468325614929,0.16392025351524353,Europe
122
+ 120,Chad,2019,4.250799179077148,7.364943981170654,0.6404520869255066,48.70000076293945,0.5372456908226013,0.05500093847513199,0.8322834968566895,0.5872111916542053,0.46006128191947937,Africa
123
+ 121,Liberia,2019,5.121460914611816,7.263903617858887,0.7124737501144409,56.900001525878906,0.7058745622634888,0.050611626356840134,0.8284689784049988,0.635608971118927,0.3891325891017914,Africa
124
+ 122,Libya,2019,5.330222129821777,9.627349853515625,0.826719343662262,62.29999923706055,0.7619643211364746,-0.07267285138368607,0.6864129900932312,0.7087408900260925,0.4007374346256256,Africa
125
+ 123,Pakistan,2019,4.442717552185059,8.453290939331055,0.6172957420349121,58.900001525878906,0.6846755743026733,0.12372947484254837,0.775998055934906,0.5810673832893372,0.4242400825023651,Asia
126
+ 124,Armenia,2019,5.488086700439453,9.521769523620605,0.7816038727760315,67.19999694824219,0.8443241119384766,-0.17236898839473724,0.583472728729248,0.5982378125190735,0.43046340346336365,Europe
127
+ 125,Comoros,2019,4.608616352081299,8.033134460449219,0.6320129632949829,57.5,0.5382615327835083,0.0772530809044838,0.7622324824333191,0.7362217307090759,0.33616289496421814,Africa
128
+ 126,Afghanistan,2019,2.375091791152954,7.6972479820251465,0.41997286677360535,52.400001525878906,0.3936561644077301,-0.10845886915922165,0.9238491058349609,0.35138705372810364,0.5024737119674683,Asia
129
+ 127,Palestinian Territories,2019,4.482537269592285,,0.832550048828125,,0.653488278388977,,0.8292827606201172,0.6251764297485352,0.3996722996234894,
130
+ 128,Nicaragua,2019,6.112545013427734,8.59546947479248,0.873863935470581,67.80000305175781,0.8826784491539001,0.029247265309095383,0.6219817399978638,0.835423469543457,0.33701297640800476,North America
131
+ 129,Niger,2019,5.003544330596924,7.105849266052246,0.6769587397575378,54.0,0.8313618898391724,0.025959890335798264,0.7288551330566406,0.8159151673316956,0.3044382631778717,Africa
132
+ 130,Lebanon,2019,4.024219512939453,9.596782684326172,0.8659685254096985,67.5999984741211,0.44700148701667786,-0.08108239620923996,0.890415608882904,0.32168975472450256,0.4944990277290344,Asia
133
+ 131,Lesotho,2019,3.5117805004119873,7.925776958465576,0.7897053956985474,48.70000076293945,0.7163135409355164,-0.13053622841835022,0.9149514436721802,0.7348799109458923,0.27342551946640015,Africa
134
+ 132,Uzbekistan,2019,6.154049396514893,8.853480339050293,0.9152759313583374,65.4000015258789,0.9702945351600647,0.3042975962162018,0.5111968517303467,0.8448085188865662,0.21974551677703857,Asia
135
+ 133,North Cyprus,2019,5.466615200042725,,0.8032945394515991,,0.7927346229553223,,0.6400588750839233,0.49369287490844727,0.2964111268520355,
136
+ 134,Kuwait,2019,6.106119632720947,10.816696166992188,0.8415197730064392,66.9000015258789,0.8672738075256348,-0.10416107624769211,,0.6953627467155457,0.3028763234615326,Asia
137
+ 135,Congo (Brazzaville),2019,5.21262264251709,8.101092338562012,0.624768078327179,58.5,0.6864519715309143,-0.04605123773217201,0.740589439868927,0.6452539563179016,0.40504083037376404,
138
+ 136,Peru,2019,5.9993815422058105,9.46093463897705,0.8090759515762329,68.4000015258789,0.8148059248924255,-0.1297357827425003,0.8736019134521484,0.820448100566864,0.3749854862689972,South America
139
+ 137,Vietnam,2019,5.467451095581055,8.992330551147461,0.8475921154022217,68.0999984741211,0.9524691700935364,-0.12553076446056366,0.7878892421722412,0.7511599063873291,0.18561019003391266,Asia
140
+ 138,Togo,2019,4.1794939041137695,7.375211238861084,0.5387021899223328,55.099998474121094,0.6174197793006897,0.06477482616901398,0.7366750240325928,0.5902292728424072,0.4438698887825012,Africa
141
+ 139,Belarus,2019,5.821453094482422,9.860038757324219,0.9167404770851135,66.4000015258789,0.656933605670929,-0.18593330681324005,0.5459047555923462,0.5908505916595459,0.18982140719890594,Europe
142
+ 140,Madagascar,2019,4.33908748626709,7.4062371253967285,0.7006101012229919,59.5,0.5495352149009705,-0.012468654662370682,0.7199826836585999,0.7231946587562561,0.3039596676826477,Africa
143
+ 141,Costa Rica,2019,6.997618675231934,9.885446548461914,0.9060774445533752,71.5,0.9268301129341125,-0.14599433541297913,0.83562833070755,0.8483476042747498,0.3033272325992584,North America
144
+ 142,Luxembourg,2019,7.40401554107666,11.648168563842773,0.9121045470237732,72.5999984741211,0.930321216583252,-0.04505761340260506,0.38959842920303345,0.7891863584518433,0.21163980662822723,Europe
145
+ 143,Panama,2019,6.0859551429748535,10.356431007385254,0.8857213854789734,69.69999694824219,0.882961094379425,-0.1989849954843521,0.8688275218009949,0.877561628818512,0.2435666024684906,North America
146
+ 144,Paraguay,2019,5.652625560760498,9.44814395904541,0.8924871683120728,65.9000015258789,0.8760526180267334,0.02811283804476261,0.8817861080169678,0.857724130153656,0.2751867175102234,South America
147
+ 145,Jamaica,2019,6.309238910675049,9.186201095581055,0.8778144717216492,67.5,0.8906708359718323,-0.13679705560207367,0.8853300213813782,0.7520411014556885,0.1952841430902481,North America
148
+ 146,Maldives,2018,5.197574615478516,9.8259859085083,0.9133150577545166,70.5999984741211,0.8547592759132385,0.0239978339523077,,,,Asia
149
+ 147,Haiti,2018,3.6149280071258545,7.477138042449951,0.5379759073257446,55.70000076293945,0.5914683938026428,0.4215203523635864,0.7204447388648987,0.5841132998466492,0.3587200343608856,North America
150
+ 148,Burundi,2018,3.775283098220825,6.635322093963623,0.48471522331237793,53.400001525878906,0.6463986039161682,-0.023876165971159935,0.5986076593399048,0.6664415001869202,0.3627665936946869,Africa
151
+ 149,Congo (Kinshasa),2017,4.311033248901367,6.965845584869385,0.6696884036064148,52.900001525878906,0.704239547252655,0.06837817281484604,0.8091818690299988,0.5505259037017822,0.40426206588745117,
152
+ 150,Central African Republic,2017,3.4758620262145996,6.816519260406494,0.31958913803100586,45.20000076293945,0.6452523469924927,0.07278610020875931,0.8895660042762756,0.6138651967048645,0.5993354916572571,Africa
153
+ 151,Trinidad and Tobago,2017,6.191859722137451,10.182920455932617,0.9160290360450745,63.5,0.8591404557228088,0.014855396002531052,0.911336362361908,0.8464670777320862,0.24809880554676056,North America
154
+ 152,South Sudan,2017,2.816622495651245,,0.556822657585144,51.0,0.4560110867023468,,0.7612696290016174,0.5856021642684937,0.5173637866973877,Africa
155
+ 153,Somalia,2016,4.667941093444824,,0.5944165587425232,50.0,0.9173228144645691,,0.440801739692688,0.8914231657981873,0.19328223168849945,Africa
156
+ 154,Syria,2015,3.4619128704071045,8.441536903381348,0.46391287446022034,55.20000076293945,0.44827085733413696,0.044834915548563004,0.685236930847168,0.36943960189819336,0.64258873462677,Asia
157
+ 155,Qatar,2015,6.3745293617248535,11.485614776611328,,68.30000305175781,,,,,,Asia
158
+ 156,Bhutan,2015,5.082128524780273,9.218923568725586,0.8475744128227234,60.20000076293945,0.8301015496253967,0.2774123549461365,0.6339557766914368,0.8096414804458618,0.3115893006324768,Asia
159
+ 157,Sudan,2014,4.138672828674316,8.317068099975586,0.8106155395507812,55.119998931884766,0.3900958001613617,-0.06339464336633682,0.793785035610199,0.5408450365066528,0.3027249872684479,Africa
160
+ 158,Angola,2014,3.7948379516601562,9.016735076904297,0.7546154856681824,54.599998474121094,0.3745415508747101,-0.167722687125206,0.8340756297111511,0.5785171389579773,0.36786413192749023,Africa
161
+ 159,Belize,2014,5.955646514892578,8.883127212524414,0.7569324970245361,62.220001220703125,0.8735690712928772,0.021995628252625465,0.7821053862571716,0.7549773454666138,0.2816044092178345,North America
162
+ 160,Suriname,2012,6.269286632537842,9.79708480834961,0.7972620725631714,62.2400016784668,0.8854884505271912,-0.07717316597700119,0.7512828707695007,0.7642226815223694,0.2503649890422821,South America
163
+ 161,Somaliland region,2012,5.057314395904541,,0.786291241645813,,0.7582190036773682,,0.3338317275047302,0.7351891398429871,0.15242822468280792,
164
+ 162,Oman,2011,6.852982044219971,10.382461547851562,,65.5,0.9162930250167847,0.02490849234163761,,,0.2951641082763672,Asia
165
+ 163,Djibouti,2011,4.3691935539245605,7.880099296569824,0.6329732537269592,54.70000076293945,0.7464394569396973,-0.05731891468167305,0.5189301371574402,0.5793028473854065,0.1805926263332367,Africa
166
+ 164,Guyana,2007,5.992826461791992,8.77328872680664,0.8487651944160461,57.2599983215332,0.6940056681632996,0.11003703624010086,0.8355690836906433,0.7675405740737915,0.29641976952552795,South America
167
+ 165,Cuba,2006,5.417868614196777,,0.9695951342582703,68.44000244140625,0.28145793080329895,,,0.6467117667198181,0.27660152316093445,North America
Assets/Countries/.ipynb_checkpoints/countries-checkpoint.csv ADDED
@@ -0,0 +1,195 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Country,Continent
2
+ Algeria,Africa
3
+ Angola,Africa
4
+ Benin,Africa
5
+ Botswana,Africa
6
+ Burkina,Africa
7
+ Burundi,Africa
8
+ Cameroon,Africa
9
+ Cape Verde,Africa
10
+ Central African Republic,Africa
11
+ Chad,Africa
12
+ Comoros,Africa
13
+ Congo,Africa
14
+ "Congo, Democratic Republic of",Africa
15
+ Djibouti,Africa
16
+ Egypt,Africa
17
+ Equatorial Guinea,Africa
18
+ Eritrea,Africa
19
+ Ethiopia,Africa
20
+ Gabon,Africa
21
+ Gambia,Africa
22
+ Ghana,Africa
23
+ Guinea,Africa
24
+ Guinea-Bissau,Africa
25
+ Ivory Coast,Africa
26
+ Kenya,Africa
27
+ Lesotho,Africa
28
+ Liberia,Africa
29
+ Libya,Africa
30
+ Madagascar,Africa
31
+ Malawi,Africa
32
+ Mali,Africa
33
+ Mauritania,Africa
34
+ Mauritius,Africa
35
+ Morocco,Africa
36
+ Mozambique,Africa
37
+ Namibia,Africa
38
+ Niger,Africa
39
+ Nigeria,Africa
40
+ Rwanda,Africa
41
+ Sao Tome and Principe,Africa
42
+ Senegal,Africa
43
+ Seychelles,Africa
44
+ Sierra Leone,Africa
45
+ Somalia,Africa
46
+ South Africa,Africa
47
+ South Sudan,Africa
48
+ Sudan,Africa
49
+ Swaziland,Africa
50
+ Tanzania,Africa
51
+ Togo,Africa
52
+ Tunisia,Africa
53
+ Uganda,Africa
54
+ Zambia,Africa
55
+ Zimbabwe,Africa
56
+ Afghanistan,Asia
57
+ Bahrain,Asia
58
+ Bangladesh,Asia
59
+ Bhutan,Asia
60
+ Brunei,Asia
61
+ Burma (Myanmar),Asia
62
+ Cambodia,Asia
63
+ China,Asia
64
+ East Timor,Asia
65
+ India,Asia
66
+ Indonesia,Asia
67
+ Iran,Asia
68
+ Iraq,Asia
69
+ Israel,Asia
70
+ Japan,Asia
71
+ Jordan,Asia
72
+ Kazakhstan,Asia
73
+ "Korea, North",Asia
74
+ "Korea, South",Asia
75
+ Kuwait,Asia
76
+ Kyrgyzstan,Asia
77
+ Laos,Asia
78
+ Lebanon,Asia
79
+ Malaysia,Asia
80
+ Maldives,Asia
81
+ Mongolia,Asia
82
+ Nepal,Asia
83
+ Oman,Asia
84
+ Pakistan,Asia
85
+ Philippines,Asia
86
+ Qatar,Asia
87
+ Russian Federation,Asia
88
+ Saudi Arabia,Asia
89
+ Singapore,Asia
90
+ Sri Lanka,Asia
91
+ Syria,Asia
92
+ Tajikistan,Asia
93
+ Thailand,Asia
94
+ Turkey,Asia
95
+ Turkmenistan,Asia
96
+ United Arab Emirates,Asia
97
+ Uzbekistan,Asia
98
+ Vietnam,Asia
99
+ Yemen,Asia
100
+ Albania,Europe
101
+ Andorra,Europe
102
+ Armenia,Europe
103
+ Austria,Europe
104
+ Azerbaijan,Europe
105
+ Belarus,Europe
106
+ Belgium,Europe
107
+ Bosnia and Herzegovina,Europe
108
+ Bulgaria,Europe
109
+ Croatia,Europe
110
+ Cyprus,Europe
111
+ CZ,Europe
112
+ Denmark,Europe
113
+ Estonia,Europe
114
+ Finland,Europe
115
+ France,Europe
116
+ Georgia,Europe
117
+ Germany,Europe
118
+ Greece,Europe
119
+ Hungary,Europe
120
+ Iceland,Europe
121
+ Ireland,Europe
122
+ Italy,Europe
123
+ Latvia,Europe
124
+ Liechtenstein,Europe
125
+ Lithuania,Europe
126
+ Luxembourg,Europe
127
+ Macedonia,Europe
128
+ Malta,Europe
129
+ Moldova,Europe
130
+ Monaco,Europe
131
+ Montenegro,Europe
132
+ Netherlands,Europe
133
+ Norway,Europe
134
+ Poland,Europe
135
+ Portugal,Europe
136
+ Romania,Europe
137
+ San Marino,Europe
138
+ Serbia,Europe
139
+ Slovakia,Europe
140
+ Slovenia,Europe
141
+ Spain,Europe
142
+ Sweden,Europe
143
+ Switzerland,Europe
144
+ Ukraine,Europe
145
+ United Kingdom,Europe
146
+ Vatican City,Europe
147
+ Antigua and Barbuda,North America
148
+ Bahamas,North America
149
+ Barbados,North America
150
+ Belize,North America
151
+ Canada,North America
152
+ Costa Rica,North America
153
+ Cuba,North America
154
+ Dominica,North America
155
+ Dominican Republic,North America
156
+ El Salvador,North America
157
+ Grenada,North America
158
+ Guatemala,North America
159
+ Haiti,North America
160
+ Honduras,North America
161
+ Jamaica,North America
162
+ Mexico,North America
163
+ Nicaragua,North America
164
+ Panama,North America
165
+ Saint Kitts and Nevis,North America
166
+ Saint Lucia,North America
167
+ Saint Vincent and the Grenadines,North America
168
+ Trinidad and Tobago,North America
169
+ US,North America
170
+ Australia,Oceania
171
+ Fiji,Oceania
172
+ Kiribati,Oceania
173
+ Marshall Islands,Oceania
174
+ Micronesia,Oceania
175
+ Nauru,Oceania
176
+ New Zealand,Oceania
177
+ Palau,Oceania
178
+ Papua New Guinea,Oceania
179
+ Samoa,Oceania
180
+ Solomon Islands,Oceania
181
+ Tonga,Oceania
182
+ Tuvalu,Oceania
183
+ Vanuatu,Oceania
184
+ Argentina,South America
185
+ Bolivia,South America
186
+ Brazil,South America
187
+ Chile,South America
188
+ Colombia,South America
189
+ Ecuador,South America
190
+ Guyana,South America
191
+ Paraguay,South America
192
+ Peru,South America
193
+ Suriname,South America
194
+ Uruguay,South America
195
+ Venezuela,South America
Assets/Countries/Country-Data-Origin.md ADDED
@@ -0,0 +1,4 @@
 
 
 
 
1
+ # Origin of the country data used in this project
2
+
3
+ I started by getting a list of countries on Github, from [
4
+ Daina Bouquin](https://github.com/dbouquin/IS_608/blob/master/NanosatDB_munging/Countries-Continents.csv), because it seemed relatively completey and contained continents. Then I started to think about secondary data that might be useful for exposing the bias in an algorithm and opted for the [World Happiness Report 2021](https://worldhappiness.report/ed/2021/#appendices-and-data). I added the continents to the countries in that file to ensure I could retain the initial categorization I used.
Assets/Countries/DataPanelWHR2021C2.xls ADDED
Binary file (434 kB). View file
Assets/Countries/clean-countries.ipynb ADDED
@@ -0,0 +1,2273 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "id": "daf46b53-319f-4973-9bb6-664135dd328e",
7
+ "metadata": {},
8
+ "outputs": [],
9
+ "source": [
10
+ "import pandas as pd, spacy, nltk, numpy as np, re, ssl"
11
+ ]
12
+ },
13
+ {
14
+ "cell_type": "code",
15
+ "execution_count": 56,
16
+ "id": "3cae7a11-7696-40fc-967e-7ecafcb2b0da",
17
+ "metadata": {},
18
+ "outputs": [],
19
+ "source": [
20
+ "df = pd.read_excel(\"Assets/Countries/DataPanelWHR2021C2.xls\")"
21
+ ]
22
+ },
23
+ {
24
+ "cell_type": "code",
25
+ "execution_count": 57,
26
+ "id": "c1ebf3f3-1d38-4919-b60a-dc15e7bf907b",
27
+ "metadata": {},
28
+ "outputs": [
29
+ {
30
+ "data": {
31
+ "text/html": [
32
+ "<div>\n",
33
+ "<style scoped>\n",
34
+ " .dataframe tbody tr th:only-of-type {\n",
35
+ " vertical-align: middle;\n",
36
+ " }\n",
37
+ "\n",
38
+ " .dataframe tbody tr th {\n",
39
+ " vertical-align: top;\n",
40
+ " }\n",
41
+ "\n",
42
+ " .dataframe thead th {\n",
43
+ " text-align: right;\n",
44
+ " }\n",
45
+ "</style>\n",
46
+ "<table border=\"1\" class=\"dataframe\">\n",
47
+ " <thead>\n",
48
+ " <tr style=\"text-align: right;\">\n",
49
+ " <th></th>\n",
50
+ " <th>Country</th>\n",
51
+ " <th>year</th>\n",
52
+ " <th>Life Ladder</th>\n",
53
+ " <th>Log GDP per capita</th>\n",
54
+ " <th>Social support</th>\n",
55
+ " <th>Healthy life expectancy at birth</th>\n",
56
+ " <th>Freedom to make life choices</th>\n",
57
+ " <th>Generosity</th>\n",
58
+ " <th>Perceptions of corruption</th>\n",
59
+ " <th>Positive affect</th>\n",
60
+ " <th>Negative affect</th>\n",
61
+ " </tr>\n",
62
+ " </thead>\n",
63
+ " <tbody>\n",
64
+ " <tr>\n",
65
+ " <th>0</th>\n",
66
+ " <td>Afghanistan</td>\n",
67
+ " <td>2008</td>\n",
68
+ " <td>3.723590</td>\n",
69
+ " <td>7.370100</td>\n",
70
+ " <td>0.450662</td>\n",
71
+ " <td>50.799999</td>\n",
72
+ " <td>0.718114</td>\n",
73
+ " <td>0.167640</td>\n",
74
+ " <td>0.881686</td>\n",
75
+ " <td>0.517637</td>\n",
76
+ " <td>0.258195</td>\n",
77
+ " </tr>\n",
78
+ " <tr>\n",
79
+ " <th>1</th>\n",
80
+ " <td>Afghanistan</td>\n",
81
+ " <td>2009</td>\n",
82
+ " <td>4.401778</td>\n",
83
+ " <td>7.539972</td>\n",
84
+ " <td>0.552308</td>\n",
85
+ " <td>51.200001</td>\n",
86
+ " <td>0.678896</td>\n",
87
+ " <td>0.190099</td>\n",
88
+ " <td>0.850035</td>\n",
89
+ " <td>0.583926</td>\n",
90
+ " <td>0.237092</td>\n",
91
+ " </tr>\n",
92
+ " <tr>\n",
93
+ " <th>2</th>\n",
94
+ " <td>Afghanistan</td>\n",
95
+ " <td>2010</td>\n",
96
+ " <td>4.758381</td>\n",
97
+ " <td>7.646709</td>\n",
98
+ " <td>0.539075</td>\n",
99
+ " <td>51.599998</td>\n",
100
+ " <td>0.600127</td>\n",
101
+ " <td>0.120590</td>\n",
102
+ " <td>0.706766</td>\n",
103
+ " <td>0.618265</td>\n",
104
+ " <td>0.275324</td>\n",
105
+ " </tr>\n",
106
+ " <tr>\n",
107
+ " <th>3</th>\n",
108
+ " <td>Afghanistan</td>\n",
109
+ " <td>2011</td>\n",
110
+ " <td>3.831719</td>\n",
111
+ " <td>7.619532</td>\n",
112
+ " <td>0.521104</td>\n",
113
+ " <td>51.919998</td>\n",
114
+ " <td>0.495901</td>\n",
115
+ " <td>0.162427</td>\n",
116
+ " <td>0.731109</td>\n",
117
+ " <td>0.611387</td>\n",
118
+ " <td>0.267175</td>\n",
119
+ " </tr>\n",
120
+ " <tr>\n",
121
+ " <th>4</th>\n",
122
+ " <td>Afghanistan</td>\n",
123
+ " <td>2012</td>\n",
124
+ " <td>3.782938</td>\n",
125
+ " <td>7.705479</td>\n",
126
+ " <td>0.520637</td>\n",
127
+ " <td>52.240002</td>\n",
128
+ " <td>0.530935</td>\n",
129
+ " <td>0.236032</td>\n",
130
+ " <td>0.775620</td>\n",
131
+ " <td>0.710385</td>\n",
132
+ " <td>0.267919</td>\n",
133
+ " </tr>\n",
134
+ " </tbody>\n",
135
+ "</table>\n",
136
+ "</div>"
137
+ ],
138
+ "text/plain": [
139
+ " Country year Life Ladder Log GDP per capita Social support \\\n",
140
+ "0 Afghanistan 2008 3.723590 7.370100 0.450662 \n",
141
+ "1 Afghanistan 2009 4.401778 7.539972 0.552308 \n",
142
+ "2 Afghanistan 2010 4.758381 7.646709 0.539075 \n",
143
+ "3 Afghanistan 2011 3.831719 7.619532 0.521104 \n",
144
+ "4 Afghanistan 2012 3.782938 7.705479 0.520637 \n",
145
+ "\n",
146
+ " Healthy life expectancy at birth Freedom to make life choices Generosity \\\n",
147
+ "0 50.799999 0.718114 0.167640 \n",
148
+ "1 51.200001 0.678896 0.190099 \n",
149
+ "2 51.599998 0.600127 0.120590 \n",
150
+ "3 51.919998 0.495901 0.162427 \n",
151
+ "4 52.240002 0.530935 0.236032 \n",
152
+ "\n",
153
+ " Perceptions of corruption Positive affect Negative affect \n",
154
+ "0 0.881686 0.517637 0.258195 \n",
155
+ "1 0.850035 0.583926 0.237092 \n",
156
+ "2 0.706766 0.618265 0.275324 \n",
157
+ "3 0.731109 0.611387 0.267175 \n",
158
+ "4 0.775620 0.710385 0.267919 "
159
+ ]
160
+ },
161
+ "execution_count": 57,
162
+ "metadata": {},
163
+ "output_type": "execute_result"
164
+ }
165
+ ],
166
+ "source": [
167
+ "df.head()"
168
+ ]
169
+ },
170
+ {
171
+ "cell_type": "code",
172
+ "execution_count": 59,
173
+ "id": "a1d054e6-8ca7-4675-913e-b0b500afe105",
174
+ "metadata": {},
175
+ "outputs": [],
176
+ "source": [
177
+ "df_sorted = df.sort_values(by=['year'], ascending = False)"
178
+ ]
179
+ },
180
+ {
181
+ "cell_type": "code",
182
+ "execution_count": 60,
183
+ "id": "42d08d97-fa68-40dc-9cfd-b0aa8acbb838",
184
+ "metadata": {},
185
+ "outputs": [
186
+ {
187
+ "data": {
188
+ "text/html": [
189
+ "<div>\n",
190
+ "<style scoped>\n",
191
+ " .dataframe tbody tr th:only-of-type {\n",
192
+ " vertical-align: middle;\n",
193
+ " }\n",
194
+ "\n",
195
+ " .dataframe tbody tr th {\n",
196
+ " vertical-align: top;\n",
197
+ " }\n",
198
+ "\n",
199
+ " .dataframe thead th {\n",
200
+ " text-align: right;\n",
201
+ " }\n",
202
+ "</style>\n",
203
+ "<table border=\"1\" class=\"dataframe\">\n",
204
+ " <thead>\n",
205
+ " <tr style=\"text-align: right;\">\n",
206
+ " <th></th>\n",
207
+ " <th>Country</th>\n",
208
+ " <th>year</th>\n",
209
+ " <th>Life Ladder</th>\n",
210
+ " <th>Log GDP per capita</th>\n",
211
+ " <th>Social support</th>\n",
212
+ " <th>Healthy life expectancy at birth</th>\n",
213
+ " <th>Freedom to make life choices</th>\n",
214
+ " <th>Generosity</th>\n",
215
+ " <th>Perceptions of corruption</th>\n",
216
+ " <th>Positive affect</th>\n",
217
+ " <th>Negative affect</th>\n",
218
+ " </tr>\n",
219
+ " </thead>\n",
220
+ " <tbody>\n",
221
+ " <tr>\n",
222
+ " <th>1948</th>\n",
223
+ " <td>Zimbabwe</td>\n",
224
+ " <td>2020</td>\n",
225
+ " <td>3.159802</td>\n",
226
+ " <td>7.828757</td>\n",
227
+ " <td>0.717243</td>\n",
228
+ " <td>56.799999</td>\n",
229
+ " <td>0.643303</td>\n",
230
+ " <td>-0.008696</td>\n",
231
+ " <td>0.788523</td>\n",
232
+ " <td>0.702573</td>\n",
233
+ " <td>0.345736</td>\n",
234
+ " </tr>\n",
235
+ " <tr>\n",
236
+ " <th>174</th>\n",
237
+ " <td>Benin</td>\n",
238
+ " <td>2020</td>\n",
239
+ " <td>4.407746</td>\n",
240
+ " <td>8.102292</td>\n",
241
+ " <td>0.506636</td>\n",
242
+ " <td>55.099998</td>\n",
243
+ " <td>0.783115</td>\n",
244
+ " <td>-0.083489</td>\n",
245
+ " <td>0.531884</td>\n",
246
+ " <td>0.608585</td>\n",
247
+ " <td>0.304512</td>\n",
248
+ " </tr>\n",
249
+ " <tr>\n",
250
+ " <th>1835</th>\n",
251
+ " <td>United Kingdom</td>\n",
252
+ " <td>2020</td>\n",
253
+ " <td>6.798177</td>\n",
254
+ " <td>10.625811</td>\n",
255
+ " <td>0.929353</td>\n",
256
+ " <td>72.699997</td>\n",
257
+ " <td>0.884624</td>\n",
258
+ " <td>0.202508</td>\n",
259
+ " <td>0.490204</td>\n",
260
+ " <td>0.758164</td>\n",
261
+ " <td>0.224655</td>\n",
262
+ " </tr>\n",
263
+ " <tr>\n",
264
+ " <th>1394</th>\n",
265
+ " <td>Philippines</td>\n",
266
+ " <td>2020</td>\n",
267
+ " <td>5.079585</td>\n",
268
+ " <td>9.061443</td>\n",
269
+ " <td>0.781140</td>\n",
270
+ " <td>62.099998</td>\n",
271
+ " <td>0.932042</td>\n",
272
+ " <td>-0.115543</td>\n",
273
+ " <td>0.744284</td>\n",
274
+ " <td>0.803562</td>\n",
275
+ " <td>0.326889</td>\n",
276
+ " </tr>\n",
277
+ " <tr>\n",
278
+ " <th>785</th>\n",
279
+ " <td>Iraq</td>\n",
280
+ " <td>2020</td>\n",
281
+ " <td>4.785165</td>\n",
282
+ " <td>9.167186</td>\n",
283
+ " <td>0.707847</td>\n",
284
+ " <td>61.400002</td>\n",
285
+ " <td>0.700215</td>\n",
286
+ " <td>-0.020748</td>\n",
287
+ " <td>0.849109</td>\n",
288
+ " <td>0.644464</td>\n",
289
+ " <td>0.531539</td>\n",
290
+ " </tr>\n",
291
+ " </tbody>\n",
292
+ "</table>\n",
293
+ "</div>"
294
+ ],
295
+ "text/plain": [
296
+ " Country year Life Ladder Log GDP per capita Social support \\\n",
297
+ "1948 Zimbabwe 2020 3.159802 7.828757 0.717243 \n",
298
+ "174 Benin 2020 4.407746 8.102292 0.506636 \n",
299
+ "1835 United Kingdom 2020 6.798177 10.625811 0.929353 \n",
300
+ "1394 Philippines 2020 5.079585 9.061443 0.781140 \n",
301
+ "785 Iraq 2020 4.785165 9.167186 0.707847 \n",
302
+ "\n",
303
+ " Healthy life expectancy at birth Freedom to make life choices \\\n",
304
+ "1948 56.799999 0.643303 \n",
305
+ "174 55.099998 0.783115 \n",
306
+ "1835 72.699997 0.884624 \n",
307
+ "1394 62.099998 0.932042 \n",
308
+ "785 61.400002 0.700215 \n",
309
+ "\n",
310
+ " Generosity Perceptions of corruption Positive affect Negative affect \n",
311
+ "1948 -0.008696 0.788523 0.702573 0.345736 \n",
312
+ "174 -0.083489 0.531884 0.608585 0.304512 \n",
313
+ "1835 0.202508 0.490204 0.758164 0.224655 \n",
314
+ "1394 -0.115543 0.744284 0.803562 0.326889 \n",
315
+ "785 -0.020748 0.849109 0.644464 0.531539 "
316
+ ]
317
+ },
318
+ "execution_count": 60,
319
+ "metadata": {},
320
+ "output_type": "execute_result"
321
+ }
322
+ ],
323
+ "source": [
324
+ "df_sorted.head()"
325
+ ]
326
+ },
327
+ {
328
+ "cell_type": "code",
329
+ "execution_count": 61,
330
+ "id": "abb8954c-106f-42d1-bf2a-0200b8927306",
331
+ "metadata": {},
332
+ "outputs": [],
333
+ "source": [
334
+ "df_dedup = df_sorted.drop_duplicates(subset=['Country'])"
335
+ ]
336
+ },
337
+ {
338
+ "cell_type": "code",
339
+ "execution_count": 62,
340
+ "id": "969f5fcf-5dc6-4ce3-93f7-0f35473f3c73",
341
+ "metadata": {},
342
+ "outputs": [
343
+ {
344
+ "data": {
345
+ "text/html": [
346
+ "<div>\n",
347
+ "<style scoped>\n",
348
+ " .dataframe tbody tr th:only-of-type {\n",
349
+ " vertical-align: middle;\n",
350
+ " }\n",
351
+ "\n",
352
+ " .dataframe tbody tr th {\n",
353
+ " vertical-align: top;\n",
354
+ " }\n",
355
+ "\n",
356
+ " .dataframe thead th {\n",
357
+ " text-align: right;\n",
358
+ " }\n",
359
+ "</style>\n",
360
+ "<table border=\"1\" class=\"dataframe\">\n",
361
+ " <thead>\n",
362
+ " <tr style=\"text-align: right;\">\n",
363
+ " <th></th>\n",
364
+ " <th>Country</th>\n",
365
+ " <th>year</th>\n",
366
+ " <th>Life Ladder</th>\n",
367
+ " <th>Log GDP per capita</th>\n",
368
+ " <th>Social support</th>\n",
369
+ " <th>Healthy life expectancy at birth</th>\n",
370
+ " <th>Freedom to make life choices</th>\n",
371
+ " <th>Generosity</th>\n",
372
+ " <th>Perceptions of corruption</th>\n",
373
+ " <th>Positive affect</th>\n",
374
+ " <th>Negative affect</th>\n",
375
+ " </tr>\n",
376
+ " </thead>\n",
377
+ " <tbody>\n",
378
+ " <tr>\n",
379
+ " <th>1948</th>\n",
380
+ " <td>Zimbabwe</td>\n",
381
+ " <td>2020</td>\n",
382
+ " <td>3.159802</td>\n",
383
+ " <td>7.828757</td>\n",
384
+ " <td>0.717243</td>\n",
385
+ " <td>56.799999</td>\n",
386
+ " <td>0.643303</td>\n",
387
+ " <td>-0.008696</td>\n",
388
+ " <td>0.788523</td>\n",
389
+ " <td>0.702573</td>\n",
390
+ " <td>0.345736</td>\n",
391
+ " </tr>\n",
392
+ " <tr>\n",
393
+ " <th>174</th>\n",
394
+ " <td>Benin</td>\n",
395
+ " <td>2020</td>\n",
396
+ " <td>4.407746</td>\n",
397
+ " <td>8.102292</td>\n",
398
+ " <td>0.506636</td>\n",
399
+ " <td>55.099998</td>\n",
400
+ " <td>0.783115</td>\n",
401
+ " <td>-0.083489</td>\n",
402
+ " <td>0.531884</td>\n",
403
+ " <td>0.608585</td>\n",
404
+ " <td>0.304512</td>\n",
405
+ " </tr>\n",
406
+ " <tr>\n",
407
+ " <th>1835</th>\n",
408
+ " <td>United Kingdom</td>\n",
409
+ " <td>2020</td>\n",
410
+ " <td>6.798177</td>\n",
411
+ " <td>10.625811</td>\n",
412
+ " <td>0.929353</td>\n",
413
+ " <td>72.699997</td>\n",
414
+ " <td>0.884624</td>\n",
415
+ " <td>0.202508</td>\n",
416
+ " <td>0.490204</td>\n",
417
+ " <td>0.758164</td>\n",
418
+ " <td>0.224655</td>\n",
419
+ " </tr>\n",
420
+ " <tr>\n",
421
+ " <th>1394</th>\n",
422
+ " <td>Philippines</td>\n",
423
+ " <td>2020</td>\n",
424
+ " <td>5.079585</td>\n",
425
+ " <td>9.061443</td>\n",
426
+ " <td>0.781140</td>\n",
427
+ " <td>62.099998</td>\n",
428
+ " <td>0.932042</td>\n",
429
+ " <td>-0.115543</td>\n",
430
+ " <td>0.744284</td>\n",
431
+ " <td>0.803562</td>\n",
432
+ " <td>0.326889</td>\n",
433
+ " </tr>\n",
434
+ " <tr>\n",
435
+ " <th>785</th>\n",
436
+ " <td>Iraq</td>\n",
437
+ " <td>2020</td>\n",
438
+ " <td>4.785165</td>\n",
439
+ " <td>9.167186</td>\n",
440
+ " <td>0.707847</td>\n",
441
+ " <td>61.400002</td>\n",
442
+ " <td>0.700215</td>\n",
443
+ " <td>-0.020748</td>\n",
444
+ " <td>0.849109</td>\n",
445
+ " <td>0.644464</td>\n",
446
+ " <td>0.531539</td>\n",
447
+ " </tr>\n",
448
+ " </tbody>\n",
449
+ "</table>\n",
450
+ "</div>"
451
+ ],
452
+ "text/plain": [
453
+ " Country year Life Ladder Log GDP per capita Social support \\\n",
454
+ "1948 Zimbabwe 2020 3.159802 7.828757 0.717243 \n",
455
+ "174 Benin 2020 4.407746 8.102292 0.506636 \n",
456
+ "1835 United Kingdom 2020 6.798177 10.625811 0.929353 \n",
457
+ "1394 Philippines 2020 5.079585 9.061443 0.781140 \n",
458
+ "785 Iraq 2020 4.785165 9.167186 0.707847 \n",
459
+ "\n",
460
+ " Healthy life expectancy at birth Freedom to make life choices \\\n",
461
+ "1948 56.799999 0.643303 \n",
462
+ "174 55.099998 0.783115 \n",
463
+ "1835 72.699997 0.884624 \n",
464
+ "1394 62.099998 0.932042 \n",
465
+ "785 61.400002 0.700215 \n",
466
+ "\n",
467
+ " Generosity Perceptions of corruption Positive affect Negative affect \n",
468
+ "1948 -0.008696 0.788523 0.702573 0.345736 \n",
469
+ "174 -0.083489 0.531884 0.608585 0.304512 \n",
470
+ "1835 0.202508 0.490204 0.758164 0.224655 \n",
471
+ "1394 -0.115543 0.744284 0.803562 0.326889 \n",
472
+ "785 -0.020748 0.849109 0.644464 0.531539 "
473
+ ]
474
+ },
475
+ "execution_count": 62,
476
+ "metadata": {},
477
+ "output_type": "execute_result"
478
+ }
479
+ ],
480
+ "source": [
481
+ "df_dedup.head()"
482
+ ]
483
+ },
484
+ {
485
+ "cell_type": "code",
486
+ "execution_count": 63,
487
+ "id": "d080546c-4698-4edd-8b76-e3c94aee9862",
488
+ "metadata": {},
489
+ "outputs": [
490
+ {
491
+ "data": {
492
+ "text/plain": [
493
+ "1949"
494
+ ]
495
+ },
496
+ "execution_count": 63,
497
+ "metadata": {},
498
+ "output_type": "execute_result"
499
+ }
500
+ ],
501
+ "source": [
502
+ "len(df_sorted)"
503
+ ]
504
+ },
505
+ {
506
+ "cell_type": "code",
507
+ "execution_count": 64,
508
+ "id": "6a817f5c-e871-4d69-9368-00a90efc6007",
509
+ "metadata": {},
510
+ "outputs": [
511
+ {
512
+ "data": {
513
+ "text/plain": [
514
+ "166"
515
+ ]
516
+ },
517
+ "execution_count": 64,
518
+ "metadata": {},
519
+ "output_type": "execute_result"
520
+ }
521
+ ],
522
+ "source": [
523
+ "len(df_dedup)"
524
+ ]
525
+ },
526
+ {
527
+ "cell_type": "code",
528
+ "execution_count": 65,
529
+ "id": "d6640a42-064e-4b31-b89d-de4f7d4240a3",
530
+ "metadata": {},
531
+ "outputs": [
532
+ {
533
+ "data": {
534
+ "text/html": [
535
+ "<div>\n",
536
+ "<style scoped>\n",
537
+ " .dataframe tbody tr th:only-of-type {\n",
538
+ " vertical-align: middle;\n",
539
+ " }\n",
540
+ "\n",
541
+ " .dataframe tbody tr th {\n",
542
+ " vertical-align: top;\n",
543
+ " }\n",
544
+ "\n",
545
+ " .dataframe thead th {\n",
546
+ " text-align: right;\n",
547
+ " }\n",
548
+ "</style>\n",
549
+ "<table border=\"1\" class=\"dataframe\">\n",
550
+ " <thead>\n",
551
+ " <tr style=\"text-align: right;\">\n",
552
+ " <th></th>\n",
553
+ " <th>Country</th>\n",
554
+ " <th>Continent</th>\n",
555
+ " </tr>\n",
556
+ " </thead>\n",
557
+ " <tbody>\n",
558
+ " <tr>\n",
559
+ " <th>0</th>\n",
560
+ " <td>Algeria</td>\n",
561
+ " <td>Africa</td>\n",
562
+ " </tr>\n",
563
+ " <tr>\n",
564
+ " <th>1</th>\n",
565
+ " <td>Angola</td>\n",
566
+ " <td>Africa</td>\n",
567
+ " </tr>\n",
568
+ " <tr>\n",
569
+ " <th>2</th>\n",
570
+ " <td>Benin</td>\n",
571
+ " <td>Africa</td>\n",
572
+ " </tr>\n",
573
+ " <tr>\n",
574
+ " <th>3</th>\n",
575
+ " <td>Botswana</td>\n",
576
+ " <td>Africa</td>\n",
577
+ " </tr>\n",
578
+ " <tr>\n",
579
+ " <th>4</th>\n",
580
+ " <td>Burkina</td>\n",
581
+ " <td>Africa</td>\n",
582
+ " </tr>\n",
583
+ " </tbody>\n",
584
+ "</table>\n",
585
+ "</div>"
586
+ ],
587
+ "text/plain": [
588
+ " Country Continent\n",
589
+ "0 Algeria Africa\n",
590
+ "1 Angola Africa\n",
591
+ "2 Benin Africa\n",
592
+ "3 Botswana Africa\n",
593
+ "4 Burkina Africa"
594
+ ]
595
+ },
596
+ "execution_count": 65,
597
+ "metadata": {},
598
+ "output_type": "execute_result"
599
+ }
600
+ ],
601
+ "source": [
602
+ "df_csv = pd.read_csv(\"Assets/Countries/countries.csv\")\n",
603
+ "df_csv.head()"
604
+ ]
605
+ },
606
+ {
607
+ "cell_type": "code",
608
+ "execution_count": 18,
609
+ "id": "a6e6f52e-cff7-4d78-b630-e71e07fa8842",
610
+ "metadata": {},
611
+ "outputs": [
612
+ {
613
+ "data": {
614
+ "text/plain": [
615
+ "194"
616
+ ]
617
+ },
618
+ "execution_count": 18,
619
+ "metadata": {},
620
+ "output_type": "execute_result"
621
+ }
622
+ ],
623
+ "source": [
624
+ "len(df_csv)"
625
+ ]
626
+ },
627
+ {
628
+ "cell_type": "code",
629
+ "execution_count": 66,
630
+ "id": "edaae740-75bf-42a2-afa6-ebbbbf50d792",
631
+ "metadata": {},
632
+ "outputs": [],
633
+ "source": [
634
+ "c1 = df_dedup[\"Country\"]\n",
635
+ "c2 = list(df_csv[\"Country\"])\n",
636
+ "c3 = [(country, country in c2) for country in c1]"
637
+ ]
638
+ },
639
+ {
640
+ "cell_type": "code",
641
+ "execution_count": 67,
642
+ "id": "5e86b02e-e5a3-4eaf-b045-74f0d0cfea08",
643
+ "metadata": {},
644
+ "outputs": [
645
+ {
646
+ "data": {
647
+ "text/plain": [
648
+ "True"
649
+ ]
650
+ },
651
+ "execution_count": 67,
652
+ "metadata": {},
653
+ "output_type": "execute_result"
654
+ }
655
+ ],
656
+ "source": [
657
+ "\"Zimbabwe\" in c2"
658
+ ]
659
+ },
660
+ {
661
+ "cell_type": "code",
662
+ "execution_count": 68,
663
+ "id": "921765a7-6f40-4d6a-9403-f5f8d8f26a65",
664
+ "metadata": {},
665
+ "outputs": [
666
+ {
667
+ "data": {
668
+ "text/plain": [
669
+ "[('Zimbabwe', True),\n",
670
+ " ('Benin', True),\n",
671
+ " ('United Kingdom', True),\n",
672
+ " ('Philippines', True),\n",
673
+ " ('Iraq', True),\n",
674
+ " ('Belgium', True),\n",
675
+ " ('Iran', True),\n",
676
+ " ('Poland', True),\n",
677
+ " ('Portugal', True),\n",
678
+ " ('India', True),\n",
679
+ " ('Israel', True),\n",
680
+ " ('Iceland', True),\n",
681
+ " ('United Arab Emirates', True),\n",
682
+ " ('Hungary', True),\n",
683
+ " ('Hong Kong S.A.R. of China', False),\n",
684
+ " ('Bolivia', True),\n",
685
+ " ('Russia', False),\n",
686
+ " ('Saudi Arabia', True),\n",
687
+ " ('Ireland', True),\n",
688
+ " ('Italy', True),\n",
689
+ " ('Ukraine', True),\n",
690
+ " ('Kenya', True),\n",
691
+ " ('Latvia', True),\n",
692
+ " ('Laos', True),\n",
693
+ " ('Nigeria', True),\n",
694
+ " ('Austria', True),\n",
695
+ " ('Kyrgyzstan', True),\n",
696
+ " ('North Macedonia', False),\n",
697
+ " ('Kosovo', False),\n",
698
+ " ('Norway', True),\n",
699
+ " ('United States', False),\n",
700
+ " ('Kazakhstan', True),\n",
701
+ " ('Bahrain', True),\n",
702
+ " ('Uruguay', True),\n",
703
+ " ('Jordan', True),\n",
704
+ " ('Japan', True),\n",
705
+ " ('Bangladesh', True),\n",
706
+ " ('Ivory Coast', True),\n",
707
+ " ('Bosnia and Herzegovina', True),\n",
708
+ " ('Greece', True),\n",
709
+ " ('Australia', True),\n",
710
+ " ('Croatia', True),\n",
711
+ " ('Tunisia', True),\n",
712
+ " ('Spain', True),\n",
713
+ " ('Denmark', True),\n",
714
+ " ('Cameroon', True),\n",
715
+ " ('Czech Republic', False),\n",
716
+ " ('Cyprus', True),\n",
717
+ " ('Sweden', True),\n",
718
+ " ('Canada', True),\n",
719
+ " ('South Korea', False),\n",
720
+ " ('Switzerland', True),\n",
721
+ " ('Thailand', True),\n",
722
+ " ('Taiwan Province of China', False),\n",
723
+ " ('Colombia', True),\n",
724
+ " ('Tajikistan', True),\n",
725
+ " ('Tanzania', True),\n",
726
+ " ('China', True),\n",
727
+ " ('Dominican Republic', True),\n",
728
+ " ('Cambodia', True),\n",
729
+ " ('Ghana', True),\n",
730
+ " ('Slovakia', True),\n",
731
+ " ('Serbia', True),\n",
732
+ " ('Uganda', True),\n",
733
+ " ('Germany', True),\n",
734
+ " ('Georgia', True),\n",
735
+ " ('Brazil', True),\n",
736
+ " ('France', True),\n",
737
+ " ('Bulgaria', True),\n",
738
+ " ('Finland', True),\n",
739
+ " ('Ecuador', True),\n",
740
+ " ('Ethiopia', True),\n",
741
+ " ('Slovenia', True),\n",
742
+ " ('Estonia', True),\n",
743
+ " ('El Salvador', True),\n",
744
+ " ('Turkey', True),\n",
745
+ " ('South Africa', True),\n",
746
+ " ('Egypt', True),\n",
747
+ " ('Venezuela', True),\n",
748
+ " ('Chile', True),\n",
749
+ " ('Lithuania', True),\n",
750
+ " ('Moldova', True),\n",
751
+ " ('Netherlands', True),\n",
752
+ " ('Mongolia', True),\n",
753
+ " ('Mauritius', True),\n",
754
+ " ('Mexico', True),\n",
755
+ " ('New Zealand', True),\n",
756
+ " ('Namibia', True),\n",
757
+ " ('Myanmar', False),\n",
758
+ " ('Malta', True),\n",
759
+ " ('Zambia', True),\n",
760
+ " ('Argentina', True),\n",
761
+ " ('Morocco', True),\n",
762
+ " ('Albania', True),\n",
763
+ " ('Montenegro', True),\n",
764
+ " ('Guinea', True),\n",
765
+ " ('Yemen', True),\n",
766
+ " ('Guatemala', True),\n",
767
+ " ('Malaysia', True),\n",
768
+ " ('Rwanda', True),\n",
769
+ " ('Sri Lanka', True),\n",
770
+ " ('Malawi', True),\n",
771
+ " ('Nepal', True),\n",
772
+ " ('Swaziland', True),\n",
773
+ " ('Romania', True),\n",
774
+ " ('Senegal', True),\n",
775
+ " ('Honduras', True),\n",
776
+ " ('Mali', True),\n",
777
+ " ('Mauritania', True),\n",
778
+ " ('Turkmenistan', True),\n",
779
+ " ('Burkina Faso', False),\n",
780
+ " ('Algeria', True),\n",
781
+ " ('Botswana', True),\n",
782
+ " ('Sierra Leone', True),\n",
783
+ " ('Mozambique', True),\n",
784
+ " ('Singapore', True),\n",
785
+ " ('Gambia', True),\n",
786
+ " ('Gabon', True),\n",
787
+ " ('Indonesia', True),\n",
788
+ " ('Azerbaijan', True),\n",
789
+ " ('Chad', True),\n",
790
+ " ('Liberia', True),\n",
791
+ " ('Libya', True),\n",
792
+ " ('Pakistan', True),\n",
793
+ " ('Armenia', True),\n",
794
+ " ('Comoros', True),\n",
795
+ " ('Afghanistan', True),\n",
796
+ " ('Palestinian Territories', False),\n",
797
+ " ('Nicaragua', True),\n",
798
+ " ('Niger', True),\n",
799
+ " ('Lebanon', True),\n",
800
+ " ('Lesotho', True),\n",
801
+ " ('Uzbekistan', True),\n",
802
+ " ('North Cyprus', False),\n",
803
+ " ('Kuwait', True),\n",
804
+ " ('Congo (Brazzaville)', False),\n",
805
+ " ('Peru', True),\n",
806
+ " ('Vietnam', True),\n",
807
+ " ('Togo', True),\n",
808
+ " ('Belarus', True),\n",
809
+ " ('Madagascar', True),\n",
810
+ " ('Costa Rica', True),\n",
811
+ " ('Luxembourg', True),\n",
812
+ " ('Panama', True),\n",
813
+ " ('Paraguay', True),\n",
814
+ " ('Jamaica', True),\n",
815
+ " ('Maldives', True),\n",
816
+ " ('Haiti', True),\n",
817
+ " ('Burundi', True),\n",
818
+ " ('Congo (Kinshasa)', False),\n",
819
+ " ('Central African Republic', True),\n",
820
+ " ('Trinidad and Tobago', True),\n",
821
+ " ('South Sudan', True),\n",
822
+ " ('Somalia', True),\n",
823
+ " ('Syria', True),\n",
824
+ " ('Qatar', True),\n",
825
+ " ('Bhutan', True),\n",
826
+ " ('Sudan', True),\n",
827
+ " ('Angola', True),\n",
828
+ " ('Belize', True),\n",
829
+ " ('Suriname', True),\n",
830
+ " ('Somaliland region', False),\n",
831
+ " ('Oman', True),\n",
832
+ " ('Djibouti', True),\n",
833
+ " ('Guyana', True),\n",
834
+ " ('Cuba', True)]"
835
+ ]
836
+ },
837
+ "execution_count": 68,
838
+ "metadata": {},
839
+ "output_type": "execute_result"
840
+ }
841
+ ],
842
+ "source": [
843
+ "c3"
844
+ ]
845
+ },
846
+ {
847
+ "cell_type": "code",
848
+ "execution_count": 37,
849
+ "id": "ff74b057-7281-4ab2-82c5-367e949fbbed",
850
+ "metadata": {},
851
+ "outputs": [
852
+ {
853
+ "data": {
854
+ "text/plain": [
855
+ "['Hong Kong S.A.R. of China',\n",
856
+ " 'Russia',\n",
857
+ " 'North Macedonia',\n",
858
+ " 'Kosovo',\n",
859
+ " 'United States',\n",
860
+ " 'Czech Republic',\n",
861
+ " 'South Korea',\n",
862
+ " 'Taiwan Province of China',\n",
863
+ " 'Myanmar',\n",
864
+ " 'Burkina Faso',\n",
865
+ " 'Palestinian Territories',\n",
866
+ " 'North Cyprus',\n",
867
+ " 'Congo (Brazzaville)',\n",
868
+ " 'Congo (Kinshasa)',\n",
869
+ " 'Somaliland region']"
870
+ ]
871
+ },
872
+ "execution_count": 37,
873
+ "metadata": {},
874
+ "output_type": "execute_result"
875
+ }
876
+ ],
877
+ "source": [
878
+ "num = 0\n",
879
+ "missing = []\n",
880
+ "for pair in c3:\n",
881
+ " if pair[1]:\n",
882
+ " num +=1\n",
883
+ " else:\n",
884
+ " missing.append(pair[0]) \n",
885
+ "num\n",
886
+ "missing"
887
+ ]
888
+ },
889
+ {
890
+ "cell_type": "code",
891
+ "execution_count": 44,
892
+ "id": "50f20260-3ed6-4f4e-a558-e3c6374ecb26",
893
+ "metadata": {},
894
+ "outputs": [
895
+ {
896
+ "data": {
897
+ "text/plain": [
898
+ "'Africa'"
899
+ ]
900
+ },
901
+ "execution_count": 44,
902
+ "metadata": {},
903
+ "output_type": "execute_result"
904
+ }
905
+ ],
906
+ "source": [
907
+ "df_csv.loc[df_csv['Country'] == \"Madagascar\", 'Continent'].iloc[0]"
908
+ ]
909
+ },
910
+ {
911
+ "cell_type": "code",
912
+ "execution_count": 50,
913
+ "id": "9dfa66ef-1c2b-4893-8993-107c2e02a2c8",
914
+ "metadata": {},
915
+ "outputs": [
916
+ {
917
+ "data": {
918
+ "text/html": [
919
+ "<div>\n",
920
+ "<style scoped>\n",
921
+ " .dataframe tbody tr th:only-of-type {\n",
922
+ " vertical-align: middle;\n",
923
+ " }\n",
924
+ "\n",
925
+ " .dataframe tbody tr th {\n",
926
+ " vertical-align: top;\n",
927
+ " }\n",
928
+ "\n",
929
+ " .dataframe thead th {\n",
930
+ " text-align: right;\n",
931
+ " }\n",
932
+ "</style>\n",
933
+ "<table border=\"1\" class=\"dataframe\">\n",
934
+ " <thead>\n",
935
+ " <tr style=\"text-align: right;\">\n",
936
+ " <th></th>\n",
937
+ " <th>Country name</th>\n",
938
+ " <th>year</th>\n",
939
+ " <th>Life Ladder</th>\n",
940
+ " <th>Log GDP per capita</th>\n",
941
+ " <th>Social support</th>\n",
942
+ " <th>Healthy life expectancy at birth</th>\n",
943
+ " <th>Freedom to make life choices</th>\n",
944
+ " <th>Generosity</th>\n",
945
+ " <th>Perceptions of corruption</th>\n",
946
+ " <th>Positive affect</th>\n",
947
+ " <th>Negative affect</th>\n",
948
+ " <th>Continent</th>\n",
949
+ " </tr>\n",
950
+ " </thead>\n",
951
+ " <tbody>\n",
952
+ " <tr>\n",
953
+ " <th>1948</th>\n",
954
+ " <td>Zimbabwe</td>\n",
955
+ " <td>2020</td>\n",
956
+ " <td>3.159802</td>\n",
957
+ " <td>7.828757</td>\n",
958
+ " <td>0.717243</td>\n",
959
+ " <td>56.799999</td>\n",
960
+ " <td>0.643303</td>\n",
961
+ " <td>-0.008696</td>\n",
962
+ " <td>0.788523</td>\n",
963
+ " <td>0.702573</td>\n",
964
+ " <td>0.345736</td>\n",
965
+ " <td>&lt;pandas.core.indexing._iLocIndexer object at 0...</td>\n",
966
+ " </tr>\n",
967
+ " <tr>\n",
968
+ " <th>174</th>\n",
969
+ " <td>Benin</td>\n",
970
+ " <td>2020</td>\n",
971
+ " <td>4.407746</td>\n",
972
+ " <td>8.102292</td>\n",
973
+ " <td>0.506636</td>\n",
974
+ " <td>55.099998</td>\n",
975
+ " <td>0.783115</td>\n",
976
+ " <td>-0.083489</td>\n",
977
+ " <td>0.531884</td>\n",
978
+ " <td>0.608585</td>\n",
979
+ " <td>0.304512</td>\n",
980
+ " <td>&lt;pandas.core.indexing._iLocIndexer object at 0...</td>\n",
981
+ " </tr>\n",
982
+ " <tr>\n",
983
+ " <th>1835</th>\n",
984
+ " <td>United Kingdom</td>\n",
985
+ " <td>2020</td>\n",
986
+ " <td>6.798177</td>\n",
987
+ " <td>10.625811</td>\n",
988
+ " <td>0.929353</td>\n",
989
+ " <td>72.699997</td>\n",
990
+ " <td>0.884624</td>\n",
991
+ " <td>0.202508</td>\n",
992
+ " <td>0.490204</td>\n",
993
+ " <td>0.758164</td>\n",
994
+ " <td>0.224655</td>\n",
995
+ " <td>&lt;pandas.core.indexing._iLocIndexer object at 0...</td>\n",
996
+ " </tr>\n",
997
+ " <tr>\n",
998
+ " <th>1394</th>\n",
999
+ " <td>Philippines</td>\n",
1000
+ " <td>2020</td>\n",
1001
+ " <td>5.079585</td>\n",
1002
+ " <td>9.061443</td>\n",
1003
+ " <td>0.781140</td>\n",
1004
+ " <td>62.099998</td>\n",
1005
+ " <td>0.932042</td>\n",
1006
+ " <td>-0.115543</td>\n",
1007
+ " <td>0.744284</td>\n",
1008
+ " <td>0.803562</td>\n",
1009
+ " <td>0.326889</td>\n",
1010
+ " <td>&lt;pandas.core.indexing._iLocIndexer object at 0...</td>\n",
1011
+ " </tr>\n",
1012
+ " <tr>\n",
1013
+ " <th>785</th>\n",
1014
+ " <td>Iraq</td>\n",
1015
+ " <td>2020</td>\n",
1016
+ " <td>4.785165</td>\n",
1017
+ " <td>9.167186</td>\n",
1018
+ " <td>0.707847</td>\n",
1019
+ " <td>61.400002</td>\n",
1020
+ " <td>0.700215</td>\n",
1021
+ " <td>-0.020748</td>\n",
1022
+ " <td>0.849109</td>\n",
1023
+ " <td>0.644464</td>\n",
1024
+ " <td>0.531539</td>\n",
1025
+ " <td>&lt;pandas.core.indexing._iLocIndexer object at 0...</td>\n",
1026
+ " </tr>\n",
1027
+ " </tbody>\n",
1028
+ "</table>\n",
1029
+ "</div>"
1030
+ ],
1031
+ "text/plain": [
1032
+ " Country name year Life Ladder Log GDP per capita Social support \\\n",
1033
+ "1948 Zimbabwe 2020 3.159802 7.828757 0.717243 \n",
1034
+ "174 Benin 2020 4.407746 8.102292 0.506636 \n",
1035
+ "1835 United Kingdom 2020 6.798177 10.625811 0.929353 \n",
1036
+ "1394 Philippines 2020 5.079585 9.061443 0.781140 \n",
1037
+ "785 Iraq 2020 4.785165 9.167186 0.707847 \n",
1038
+ "\n",
1039
+ " Healthy life expectancy at birth Freedom to make life choices \\\n",
1040
+ "1948 56.799999 0.643303 \n",
1041
+ "174 55.099998 0.783115 \n",
1042
+ "1835 72.699997 0.884624 \n",
1043
+ "1394 62.099998 0.932042 \n",
1044
+ "785 61.400002 0.700215 \n",
1045
+ "\n",
1046
+ " Generosity Perceptions of corruption Positive affect Negative affect \\\n",
1047
+ "1948 -0.008696 0.788523 0.702573 0.345736 \n",
1048
+ "174 -0.083489 0.531884 0.608585 0.304512 \n",
1049
+ "1835 0.202508 0.490204 0.758164 0.224655 \n",
1050
+ "1394 -0.115543 0.744284 0.803562 0.326889 \n",
1051
+ "785 -0.020748 0.849109 0.644464 0.531539 \n",
1052
+ "\n",
1053
+ " Continent \n",
1054
+ "1948 <pandas.core.indexing._iLocIndexer object at 0... \n",
1055
+ "174 <pandas.core.indexing._iLocIndexer object at 0... \n",
1056
+ "1835 <pandas.core.indexing._iLocIndexer object at 0... \n",
1057
+ "1394 <pandas.core.indexing._iLocIndexer object at 0... \n",
1058
+ "785 <pandas.core.indexing._iLocIndexer object at 0... "
1059
+ ]
1060
+ },
1061
+ "execution_count": 50,
1062
+ "metadata": {},
1063
+ "output_type": "execute_result"
1064
+ }
1065
+ ],
1066
+ "source": [
1067
+ "df_dedup.head()"
1068
+ ]
1069
+ },
1070
+ {
1071
+ "cell_type": "code",
1072
+ "execution_count": 74,
1073
+ "id": "b1fcd392-abfb-42a8-8485-f3fbd6a155d1",
1074
+ "metadata": {},
1075
+ "outputs": [],
1076
+ "source": [
1077
+ "df_cont = df_dedup.set_index('Country').join(df_csv.set_index('Country'), on='Country', how='left')"
1078
+ ]
1079
+ },
1080
+ {
1081
+ "cell_type": "code",
1082
+ "execution_count": 77,
1083
+ "id": "55ec121c-534e-4e25-88e9-5ab8267fd66b",
1084
+ "metadata": {},
1085
+ "outputs": [],
1086
+ "source": [
1087
+ "df_cont = df_cont.reset_index()"
1088
+ ]
1089
+ },
1090
+ {
1091
+ "cell_type": "code",
1092
+ "execution_count": 78,
1093
+ "id": "8ddaf798-772d-489d-b2fc-32d4cd76ae50",
1094
+ "metadata": {},
1095
+ "outputs": [
1096
+ {
1097
+ "data": {
1098
+ "text/plain": [
1099
+ "166"
1100
+ ]
1101
+ },
1102
+ "execution_count": 78,
1103
+ "metadata": {},
1104
+ "output_type": "execute_result"
1105
+ }
1106
+ ],
1107
+ "source": [
1108
+ "len(df_cont)"
1109
+ ]
1110
+ },
1111
+ {
1112
+ "cell_type": "code",
1113
+ "execution_count": 79,
1114
+ "id": "7420265a-e079-443c-9be0-01becf73a836",
1115
+ "metadata": {},
1116
+ "outputs": [
1117
+ {
1118
+ "data": {
1119
+ "text/html": [
1120
+ "<div>\n",
1121
+ "<style scoped>\n",
1122
+ " .dataframe tbody tr th:only-of-type {\n",
1123
+ " vertical-align: middle;\n",
1124
+ " }\n",
1125
+ "\n",
1126
+ " .dataframe tbody tr th {\n",
1127
+ " vertical-align: top;\n",
1128
+ " }\n",
1129
+ "\n",
1130
+ " .dataframe thead th {\n",
1131
+ " text-align: right;\n",
1132
+ " }\n",
1133
+ "</style>\n",
1134
+ "<table border=\"1\" class=\"dataframe\">\n",
1135
+ " <thead>\n",
1136
+ " <tr style=\"text-align: right;\">\n",
1137
+ " <th></th>\n",
1138
+ " <th>Country</th>\n",
1139
+ " <th>year</th>\n",
1140
+ " <th>Life Ladder</th>\n",
1141
+ " <th>Log GDP per capita</th>\n",
1142
+ " <th>Social support</th>\n",
1143
+ " <th>Healthy life expectancy at birth</th>\n",
1144
+ " <th>Freedom to make life choices</th>\n",
1145
+ " <th>Generosity</th>\n",
1146
+ " <th>Perceptions of corruption</th>\n",
1147
+ " <th>Positive affect</th>\n",
1148
+ " <th>Negative affect</th>\n",
1149
+ " <th>Continent</th>\n",
1150
+ " </tr>\n",
1151
+ " </thead>\n",
1152
+ " <tbody>\n",
1153
+ " <tr>\n",
1154
+ " <th>0</th>\n",
1155
+ " <td>Zimbabwe</td>\n",
1156
+ " <td>2020</td>\n",
1157
+ " <td>3.159802</td>\n",
1158
+ " <td>7.828757</td>\n",
1159
+ " <td>0.717243</td>\n",
1160
+ " <td>56.799999</td>\n",
1161
+ " <td>0.643303</td>\n",
1162
+ " <td>-0.008696</td>\n",
1163
+ " <td>0.788523</td>\n",
1164
+ " <td>0.702573</td>\n",
1165
+ " <td>0.345736</td>\n",
1166
+ " <td>Africa</td>\n",
1167
+ " </tr>\n",
1168
+ " <tr>\n",
1169
+ " <th>1</th>\n",
1170
+ " <td>Benin</td>\n",
1171
+ " <td>2020</td>\n",
1172
+ " <td>4.407746</td>\n",
1173
+ " <td>8.102292</td>\n",
1174
+ " <td>0.506636</td>\n",
1175
+ " <td>55.099998</td>\n",
1176
+ " <td>0.783115</td>\n",
1177
+ " <td>-0.083489</td>\n",
1178
+ " <td>0.531884</td>\n",
1179
+ " <td>0.608585</td>\n",
1180
+ " <td>0.304512</td>\n",
1181
+ " <td>Africa</td>\n",
1182
+ " </tr>\n",
1183
+ " <tr>\n",
1184
+ " <th>2</th>\n",
1185
+ " <td>United Kingdom</td>\n",
1186
+ " <td>2020</td>\n",
1187
+ " <td>6.798177</td>\n",
1188
+ " <td>10.625811</td>\n",
1189
+ " <td>0.929353</td>\n",
1190
+ " <td>72.699997</td>\n",
1191
+ " <td>0.884624</td>\n",
1192
+ " <td>0.202508</td>\n",
1193
+ " <td>0.490204</td>\n",
1194
+ " <td>0.758164</td>\n",
1195
+ " <td>0.224655</td>\n",
1196
+ " <td>Europe</td>\n",
1197
+ " </tr>\n",
1198
+ " <tr>\n",
1199
+ " <th>3</th>\n",
1200
+ " <td>Philippines</td>\n",
1201
+ " <td>2020</td>\n",
1202
+ " <td>5.079585</td>\n",
1203
+ " <td>9.061443</td>\n",
1204
+ " <td>0.781140</td>\n",
1205
+ " <td>62.099998</td>\n",
1206
+ " <td>0.932042</td>\n",
1207
+ " <td>-0.115543</td>\n",
1208
+ " <td>0.744284</td>\n",
1209
+ " <td>0.803562</td>\n",
1210
+ " <td>0.326889</td>\n",
1211
+ " <td>Asia</td>\n",
1212
+ " </tr>\n",
1213
+ " <tr>\n",
1214
+ " <th>4</th>\n",
1215
+ " <td>Iraq</td>\n",
1216
+ " <td>2020</td>\n",
1217
+ " <td>4.785165</td>\n",
1218
+ " <td>9.167186</td>\n",
1219
+ " <td>0.707847</td>\n",
1220
+ " <td>61.400002</td>\n",
1221
+ " <td>0.700215</td>\n",
1222
+ " <td>-0.020748</td>\n",
1223
+ " <td>0.849109</td>\n",
1224
+ " <td>0.644464</td>\n",
1225
+ " <td>0.531539</td>\n",
1226
+ " <td>Asia</td>\n",
1227
+ " </tr>\n",
1228
+ " </tbody>\n",
1229
+ "</table>\n",
1230
+ "</div>"
1231
+ ],
1232
+ "text/plain": [
1233
+ " Country year Life Ladder Log GDP per capita Social support \\\n",
1234
+ "0 Zimbabwe 2020 3.159802 7.828757 0.717243 \n",
1235
+ "1 Benin 2020 4.407746 8.102292 0.506636 \n",
1236
+ "2 United Kingdom 2020 6.798177 10.625811 0.929353 \n",
1237
+ "3 Philippines 2020 5.079585 9.061443 0.781140 \n",
1238
+ "4 Iraq 2020 4.785165 9.167186 0.707847 \n",
1239
+ "\n",
1240
+ " Healthy life expectancy at birth Freedom to make life choices Generosity \\\n",
1241
+ "0 56.799999 0.643303 -0.008696 \n",
1242
+ "1 55.099998 0.783115 -0.083489 \n",
1243
+ "2 72.699997 0.884624 0.202508 \n",
1244
+ "3 62.099998 0.932042 -0.115543 \n",
1245
+ "4 61.400002 0.700215 -0.020748 \n",
1246
+ "\n",
1247
+ " Perceptions of corruption Positive affect Negative affect Continent \n",
1248
+ "0 0.788523 0.702573 0.345736 Africa \n",
1249
+ "1 0.531884 0.608585 0.304512 Africa \n",
1250
+ "2 0.490204 0.758164 0.224655 Europe \n",
1251
+ "3 0.744284 0.803562 0.326889 Asia \n",
1252
+ "4 0.849109 0.644464 0.531539 Asia "
1253
+ ]
1254
+ },
1255
+ "execution_count": 79,
1256
+ "metadata": {},
1257
+ "output_type": "execute_result"
1258
+ }
1259
+ ],
1260
+ "source": [
1261
+ "df_cont.head()"
1262
+ ]
1263
+ },
1264
+ {
1265
+ "cell_type": "code",
1266
+ "execution_count": 81,
1267
+ "id": "fb26fc2f-f591-4e66-9357-0928c2c46e89",
1268
+ "metadata": {},
1269
+ "outputs": [],
1270
+ "source": [
1271
+ "# I updated the name of the output so that I don't accidentally overwrite the manual work I did at the end to add in the last few outliers.\n",
1272
+ "#df_cont.to_csv(\"Assets/Countries/base-combined-countries.csv\")"
1273
+ ]
1274
+ },
1275
+ {
1276
+ "cell_type": "code",
1277
+ "execution_count": 83,
1278
+ "id": "445a79b2-0023-4812-b606-1ff9cb7720e7",
1279
+ "metadata": {},
1280
+ "outputs": [],
1281
+ "source": [
1282
+ "df3 = df_csv.set_index('Country').join(df_dedup.set_index('Country'), on='Country', how='left')"
1283
+ ]
1284
+ },
1285
+ {
1286
+ "cell_type": "code",
1287
+ "execution_count": 87,
1288
+ "id": "59c3d6bb-11ea-4b4f-9a9e-d9b58561e8f2",
1289
+ "metadata": {},
1290
+ "outputs": [],
1291
+ "source": [
1292
+ "df3 = df3[df3.year.isnull()]"
1293
+ ]
1294
+ },
1295
+ {
1296
+ "cell_type": "code",
1297
+ "execution_count": 88,
1298
+ "id": "3b76dce1-a02f-4b09-bc44-b0e28271bc56",
1299
+ "metadata": {},
1300
+ "outputs": [
1301
+ {
1302
+ "data": {
1303
+ "text/html": [
1304
+ "<div>\n",
1305
+ "<style scoped>\n",
1306
+ " .dataframe tbody tr th:only-of-type {\n",
1307
+ " vertical-align: middle;\n",
1308
+ " }\n",
1309
+ "\n",
1310
+ " .dataframe tbody tr th {\n",
1311
+ " vertical-align: top;\n",
1312
+ " }\n",
1313
+ "\n",
1314
+ " .dataframe thead th {\n",
1315
+ " text-align: right;\n",
1316
+ " }\n",
1317
+ "</style>\n",
1318
+ "<table border=\"1\" class=\"dataframe\">\n",
1319
+ " <thead>\n",
1320
+ " <tr style=\"text-align: right;\">\n",
1321
+ " <th></th>\n",
1322
+ " <th>Continent</th>\n",
1323
+ " <th>year</th>\n",
1324
+ " <th>Life Ladder</th>\n",
1325
+ " <th>Log GDP per capita</th>\n",
1326
+ " <th>Social support</th>\n",
1327
+ " <th>Healthy life expectancy at birth</th>\n",
1328
+ " <th>Freedom to make life choices</th>\n",
1329
+ " <th>Generosity</th>\n",
1330
+ " <th>Perceptions of corruption</th>\n",
1331
+ " <th>Positive affect</th>\n",
1332
+ " <th>Negative affect</th>\n",
1333
+ " </tr>\n",
1334
+ " <tr>\n",
1335
+ " <th>Country</th>\n",
1336
+ " <th></th>\n",
1337
+ " <th></th>\n",
1338
+ " <th></th>\n",
1339
+ " <th></th>\n",
1340
+ " <th></th>\n",
1341
+ " <th></th>\n",
1342
+ " <th></th>\n",
1343
+ " <th></th>\n",
1344
+ " <th></th>\n",
1345
+ " <th></th>\n",
1346
+ " <th></th>\n",
1347
+ " </tr>\n",
1348
+ " </thead>\n",
1349
+ " <tbody>\n",
1350
+ " <tr>\n",
1351
+ " <th>Burkina</th>\n",
1352
+ " <td>Africa</td>\n",
1353
+ " <td>NaN</td>\n",
1354
+ " <td>NaN</td>\n",
1355
+ " <td>NaN</td>\n",
1356
+ " <td>NaN</td>\n",
1357
+ " <td>NaN</td>\n",
1358
+ " <td>NaN</td>\n",
1359
+ " <td>NaN</td>\n",
1360
+ " <td>NaN</td>\n",
1361
+ " <td>NaN</td>\n",
1362
+ " <td>NaN</td>\n",
1363
+ " </tr>\n",
1364
+ " <tr>\n",
1365
+ " <th>Cape Verde</th>\n",
1366
+ " <td>Africa</td>\n",
1367
+ " <td>NaN</td>\n",
1368
+ " <td>NaN</td>\n",
1369
+ " <td>NaN</td>\n",
1370
+ " <td>NaN</td>\n",
1371
+ " <td>NaN</td>\n",
1372
+ " <td>NaN</td>\n",
1373
+ " <td>NaN</td>\n",
1374
+ " <td>NaN</td>\n",
1375
+ " <td>NaN</td>\n",
1376
+ " <td>NaN</td>\n",
1377
+ " </tr>\n",
1378
+ " <tr>\n",
1379
+ " <th>Congo</th>\n",
1380
+ " <td>Africa</td>\n",
1381
+ " <td>NaN</td>\n",
1382
+ " <td>NaN</td>\n",
1383
+ " <td>NaN</td>\n",
1384
+ " <td>NaN</td>\n",
1385
+ " <td>NaN</td>\n",
1386
+ " <td>NaN</td>\n",
1387
+ " <td>NaN</td>\n",
1388
+ " <td>NaN</td>\n",
1389
+ " <td>NaN</td>\n",
1390
+ " <td>NaN</td>\n",
1391
+ " </tr>\n",
1392
+ " <tr>\n",
1393
+ " <th>Congo, Democratic Republic of</th>\n",
1394
+ " <td>Africa</td>\n",
1395
+ " <td>NaN</td>\n",
1396
+ " <td>NaN</td>\n",
1397
+ " <td>NaN</td>\n",
1398
+ " <td>NaN</td>\n",
1399
+ " <td>NaN</td>\n",
1400
+ " <td>NaN</td>\n",
1401
+ " <td>NaN</td>\n",
1402
+ " <td>NaN</td>\n",
1403
+ " <td>NaN</td>\n",
1404
+ " <td>NaN</td>\n",
1405
+ " </tr>\n",
1406
+ " <tr>\n",
1407
+ " <th>Equatorial Guinea</th>\n",
1408
+ " <td>Africa</td>\n",
1409
+ " <td>NaN</td>\n",
1410
+ " <td>NaN</td>\n",
1411
+ " <td>NaN</td>\n",
1412
+ " <td>NaN</td>\n",
1413
+ " <td>NaN</td>\n",
1414
+ " <td>NaN</td>\n",
1415
+ " <td>NaN</td>\n",
1416
+ " <td>NaN</td>\n",
1417
+ " <td>NaN</td>\n",
1418
+ " <td>NaN</td>\n",
1419
+ " </tr>\n",
1420
+ " <tr>\n",
1421
+ " <th>Eritrea</th>\n",
1422
+ " <td>Africa</td>\n",
1423
+ " <td>NaN</td>\n",
1424
+ " <td>NaN</td>\n",
1425
+ " <td>NaN</td>\n",
1426
+ " <td>NaN</td>\n",
1427
+ " <td>NaN</td>\n",
1428
+ " <td>NaN</td>\n",
1429
+ " <td>NaN</td>\n",
1430
+ " <td>NaN</td>\n",
1431
+ " <td>NaN</td>\n",
1432
+ " <td>NaN</td>\n",
1433
+ " </tr>\n",
1434
+ " <tr>\n",
1435
+ " <th>Guinea-Bissau</th>\n",
1436
+ " <td>Africa</td>\n",
1437
+ " <td>NaN</td>\n",
1438
+ " <td>NaN</td>\n",
1439
+ " <td>NaN</td>\n",
1440
+ " <td>NaN</td>\n",
1441
+ " <td>NaN</td>\n",
1442
+ " <td>NaN</td>\n",
1443
+ " <td>NaN</td>\n",
1444
+ " <td>NaN</td>\n",
1445
+ " <td>NaN</td>\n",
1446
+ " <td>NaN</td>\n",
1447
+ " </tr>\n",
1448
+ " <tr>\n",
1449
+ " <th>Sao Tome and Principe</th>\n",
1450
+ " <td>Africa</td>\n",
1451
+ " <td>NaN</td>\n",
1452
+ " <td>NaN</td>\n",
1453
+ " <td>NaN</td>\n",
1454
+ " <td>NaN</td>\n",
1455
+ " <td>NaN</td>\n",
1456
+ " <td>NaN</td>\n",
1457
+ " <td>NaN</td>\n",
1458
+ " <td>NaN</td>\n",
1459
+ " <td>NaN</td>\n",
1460
+ " <td>NaN</td>\n",
1461
+ " </tr>\n",
1462
+ " <tr>\n",
1463
+ " <th>Seychelles</th>\n",
1464
+ " <td>Africa</td>\n",
1465
+ " <td>NaN</td>\n",
1466
+ " <td>NaN</td>\n",
1467
+ " <td>NaN</td>\n",
1468
+ " <td>NaN</td>\n",
1469
+ " <td>NaN</td>\n",
1470
+ " <td>NaN</td>\n",
1471
+ " <td>NaN</td>\n",
1472
+ " <td>NaN</td>\n",
1473
+ " <td>NaN</td>\n",
1474
+ " <td>NaN</td>\n",
1475
+ " </tr>\n",
1476
+ " <tr>\n",
1477
+ " <th>Brunei</th>\n",
1478
+ " <td>Asia</td>\n",
1479
+ " <td>NaN</td>\n",
1480
+ " <td>NaN</td>\n",
1481
+ " <td>NaN</td>\n",
1482
+ " <td>NaN</td>\n",
1483
+ " <td>NaN</td>\n",
1484
+ " <td>NaN</td>\n",
1485
+ " <td>NaN</td>\n",
1486
+ " <td>NaN</td>\n",
1487
+ " <td>NaN</td>\n",
1488
+ " <td>NaN</td>\n",
1489
+ " </tr>\n",
1490
+ " <tr>\n",
1491
+ " <th>Burma (Myanmar)</th>\n",
1492
+ " <td>Asia</td>\n",
1493
+ " <td>NaN</td>\n",
1494
+ " <td>NaN</td>\n",
1495
+ " <td>NaN</td>\n",
1496
+ " <td>NaN</td>\n",
1497
+ " <td>NaN</td>\n",
1498
+ " <td>NaN</td>\n",
1499
+ " <td>NaN</td>\n",
1500
+ " <td>NaN</td>\n",
1501
+ " <td>NaN</td>\n",
1502
+ " <td>NaN</td>\n",
1503
+ " </tr>\n",
1504
+ " <tr>\n",
1505
+ " <th>East Timor</th>\n",
1506
+ " <td>Asia</td>\n",
1507
+ " <td>NaN</td>\n",
1508
+ " <td>NaN</td>\n",
1509
+ " <td>NaN</td>\n",
1510
+ " <td>NaN</td>\n",
1511
+ " <td>NaN</td>\n",
1512
+ " <td>NaN</td>\n",
1513
+ " <td>NaN</td>\n",
1514
+ " <td>NaN</td>\n",
1515
+ " <td>NaN</td>\n",
1516
+ " <td>NaN</td>\n",
1517
+ " </tr>\n",
1518
+ " <tr>\n",
1519
+ " <th>Korea, North</th>\n",
1520
+ " <td>Asia</td>\n",
1521
+ " <td>NaN</td>\n",
1522
+ " <td>NaN</td>\n",
1523
+ " <td>NaN</td>\n",
1524
+ " <td>NaN</td>\n",
1525
+ " <td>NaN</td>\n",
1526
+ " <td>NaN</td>\n",
1527
+ " <td>NaN</td>\n",
1528
+ " <td>NaN</td>\n",
1529
+ " <td>NaN</td>\n",
1530
+ " <td>NaN</td>\n",
1531
+ " </tr>\n",
1532
+ " <tr>\n",
1533
+ " <th>Korea, South</th>\n",
1534
+ " <td>Asia</td>\n",
1535
+ " <td>NaN</td>\n",
1536
+ " <td>NaN</td>\n",
1537
+ " <td>NaN</td>\n",
1538
+ " <td>NaN</td>\n",
1539
+ " <td>NaN</td>\n",
1540
+ " <td>NaN</td>\n",
1541
+ " <td>NaN</td>\n",
1542
+ " <td>NaN</td>\n",
1543
+ " <td>NaN</td>\n",
1544
+ " <td>NaN</td>\n",
1545
+ " </tr>\n",
1546
+ " <tr>\n",
1547
+ " <th>Russian Federation</th>\n",
1548
+ " <td>Asia</td>\n",
1549
+ " <td>NaN</td>\n",
1550
+ " <td>NaN</td>\n",
1551
+ " <td>NaN</td>\n",
1552
+ " <td>NaN</td>\n",
1553
+ " <td>NaN</td>\n",
1554
+ " <td>NaN</td>\n",
1555
+ " <td>NaN</td>\n",
1556
+ " <td>NaN</td>\n",
1557
+ " <td>NaN</td>\n",
1558
+ " <td>NaN</td>\n",
1559
+ " </tr>\n",
1560
+ " <tr>\n",
1561
+ " <th>Andorra</th>\n",
1562
+ " <td>Europe</td>\n",
1563
+ " <td>NaN</td>\n",
1564
+ " <td>NaN</td>\n",
1565
+ " <td>NaN</td>\n",
1566
+ " <td>NaN</td>\n",
1567
+ " <td>NaN</td>\n",
1568
+ " <td>NaN</td>\n",
1569
+ " <td>NaN</td>\n",
1570
+ " <td>NaN</td>\n",
1571
+ " <td>NaN</td>\n",
1572
+ " <td>NaN</td>\n",
1573
+ " </tr>\n",
1574
+ " <tr>\n",
1575
+ " <th>CZ</th>\n",
1576
+ " <td>Europe</td>\n",
1577
+ " <td>NaN</td>\n",
1578
+ " <td>NaN</td>\n",
1579
+ " <td>NaN</td>\n",
1580
+ " <td>NaN</td>\n",
1581
+ " <td>NaN</td>\n",
1582
+ " <td>NaN</td>\n",
1583
+ " <td>NaN</td>\n",
1584
+ " <td>NaN</td>\n",
1585
+ " <td>NaN</td>\n",
1586
+ " <td>NaN</td>\n",
1587
+ " </tr>\n",
1588
+ " <tr>\n",
1589
+ " <th>Liechtenstein</th>\n",
1590
+ " <td>Europe</td>\n",
1591
+ " <td>NaN</td>\n",
1592
+ " <td>NaN</td>\n",
1593
+ " <td>NaN</td>\n",
1594
+ " <td>NaN</td>\n",
1595
+ " <td>NaN</td>\n",
1596
+ " <td>NaN</td>\n",
1597
+ " <td>NaN</td>\n",
1598
+ " <td>NaN</td>\n",
1599
+ " <td>NaN</td>\n",
1600
+ " <td>NaN</td>\n",
1601
+ " </tr>\n",
1602
+ " <tr>\n",
1603
+ " <th>Macedonia</th>\n",
1604
+ " <td>Europe</td>\n",
1605
+ " <td>NaN</td>\n",
1606
+ " <td>NaN</td>\n",
1607
+ " <td>NaN</td>\n",
1608
+ " <td>NaN</td>\n",
1609
+ " <td>NaN</td>\n",
1610
+ " <td>NaN</td>\n",
1611
+ " <td>NaN</td>\n",
1612
+ " <td>NaN</td>\n",
1613
+ " <td>NaN</td>\n",
1614
+ " <td>NaN</td>\n",
1615
+ " </tr>\n",
1616
+ " <tr>\n",
1617
+ " <th>Monaco</th>\n",
1618
+ " <td>Europe</td>\n",
1619
+ " <td>NaN</td>\n",
1620
+ " <td>NaN</td>\n",
1621
+ " <td>NaN</td>\n",
1622
+ " <td>NaN</td>\n",
1623
+ " <td>NaN</td>\n",
1624
+ " <td>NaN</td>\n",
1625
+ " <td>NaN</td>\n",
1626
+ " <td>NaN</td>\n",
1627
+ " <td>NaN</td>\n",
1628
+ " <td>NaN</td>\n",
1629
+ " </tr>\n",
1630
+ " <tr>\n",
1631
+ " <th>San Marino</th>\n",
1632
+ " <td>Europe</td>\n",
1633
+ " <td>NaN</td>\n",
1634
+ " <td>NaN</td>\n",
1635
+ " <td>NaN</td>\n",
1636
+ " <td>NaN</td>\n",
1637
+ " <td>NaN</td>\n",
1638
+ " <td>NaN</td>\n",
1639
+ " <td>NaN</td>\n",
1640
+ " <td>NaN</td>\n",
1641
+ " <td>NaN</td>\n",
1642
+ " <td>NaN</td>\n",
1643
+ " </tr>\n",
1644
+ " <tr>\n",
1645
+ " <th>Vatican City</th>\n",
1646
+ " <td>Europe</td>\n",
1647
+ " <td>NaN</td>\n",
1648
+ " <td>NaN</td>\n",
1649
+ " <td>NaN</td>\n",
1650
+ " <td>NaN</td>\n",
1651
+ " <td>NaN</td>\n",
1652
+ " <td>NaN</td>\n",
1653
+ " <td>NaN</td>\n",
1654
+ " <td>NaN</td>\n",
1655
+ " <td>NaN</td>\n",
1656
+ " <td>NaN</td>\n",
1657
+ " </tr>\n",
1658
+ " <tr>\n",
1659
+ " <th>Antigua and Barbuda</th>\n",
1660
+ " <td>North America</td>\n",
1661
+ " <td>NaN</td>\n",
1662
+ " <td>NaN</td>\n",
1663
+ " <td>NaN</td>\n",
1664
+ " <td>NaN</td>\n",
1665
+ " <td>NaN</td>\n",
1666
+ " <td>NaN</td>\n",
1667
+ " <td>NaN</td>\n",
1668
+ " <td>NaN</td>\n",
1669
+ " <td>NaN</td>\n",
1670
+ " <td>NaN</td>\n",
1671
+ " </tr>\n",
1672
+ " <tr>\n",
1673
+ " <th>Bahamas</th>\n",
1674
+ " <td>North America</td>\n",
1675
+ " <td>NaN</td>\n",
1676
+ " <td>NaN</td>\n",
1677
+ " <td>NaN</td>\n",
1678
+ " <td>NaN</td>\n",
1679
+ " <td>NaN</td>\n",
1680
+ " <td>NaN</td>\n",
1681
+ " <td>NaN</td>\n",
1682
+ " <td>NaN</td>\n",
1683
+ " <td>NaN</td>\n",
1684
+ " <td>NaN</td>\n",
1685
+ " </tr>\n",
1686
+ " <tr>\n",
1687
+ " <th>Barbados</th>\n",
1688
+ " <td>North America</td>\n",
1689
+ " <td>NaN</td>\n",
1690
+ " <td>NaN</td>\n",
1691
+ " <td>NaN</td>\n",
1692
+ " <td>NaN</td>\n",
1693
+ " <td>NaN</td>\n",
1694
+ " <td>NaN</td>\n",
1695
+ " <td>NaN</td>\n",
1696
+ " <td>NaN</td>\n",
1697
+ " <td>NaN</td>\n",
1698
+ " <td>NaN</td>\n",
1699
+ " </tr>\n",
1700
+ " <tr>\n",
1701
+ " <th>Dominica</th>\n",
1702
+ " <td>North America</td>\n",
1703
+ " <td>NaN</td>\n",
1704
+ " <td>NaN</td>\n",
1705
+ " <td>NaN</td>\n",
1706
+ " <td>NaN</td>\n",
1707
+ " <td>NaN</td>\n",
1708
+ " <td>NaN</td>\n",
1709
+ " <td>NaN</td>\n",
1710
+ " <td>NaN</td>\n",
1711
+ " <td>NaN</td>\n",
1712
+ " <td>NaN</td>\n",
1713
+ " </tr>\n",
1714
+ " <tr>\n",
1715
+ " <th>Grenada</th>\n",
1716
+ " <td>North America</td>\n",
1717
+ " <td>NaN</td>\n",
1718
+ " <td>NaN</td>\n",
1719
+ " <td>NaN</td>\n",
1720
+ " <td>NaN</td>\n",
1721
+ " <td>NaN</td>\n",
1722
+ " <td>NaN</td>\n",
1723
+ " <td>NaN</td>\n",
1724
+ " <td>NaN</td>\n",
1725
+ " <td>NaN</td>\n",
1726
+ " <td>NaN</td>\n",
1727
+ " </tr>\n",
1728
+ " <tr>\n",
1729
+ " <th>Saint Kitts and Nevis</th>\n",
1730
+ " <td>North America</td>\n",
1731
+ " <td>NaN</td>\n",
1732
+ " <td>NaN</td>\n",
1733
+ " <td>NaN</td>\n",
1734
+ " <td>NaN</td>\n",
1735
+ " <td>NaN</td>\n",
1736
+ " <td>NaN</td>\n",
1737
+ " <td>NaN</td>\n",
1738
+ " <td>NaN</td>\n",
1739
+ " <td>NaN</td>\n",
1740
+ " <td>NaN</td>\n",
1741
+ " </tr>\n",
1742
+ " <tr>\n",
1743
+ " <th>Saint Lucia</th>\n",
1744
+ " <td>North America</td>\n",
1745
+ " <td>NaN</td>\n",
1746
+ " <td>NaN</td>\n",
1747
+ " <td>NaN</td>\n",
1748
+ " <td>NaN</td>\n",
1749
+ " <td>NaN</td>\n",
1750
+ " <td>NaN</td>\n",
1751
+ " <td>NaN</td>\n",
1752
+ " <td>NaN</td>\n",
1753
+ " <td>NaN</td>\n",
1754
+ " <td>NaN</td>\n",
1755
+ " </tr>\n",
1756
+ " <tr>\n",
1757
+ " <th>Saint Vincent and the Grenadines</th>\n",
1758
+ " <td>North America</td>\n",
1759
+ " <td>NaN</td>\n",
1760
+ " <td>NaN</td>\n",
1761
+ " <td>NaN</td>\n",
1762
+ " <td>NaN</td>\n",
1763
+ " <td>NaN</td>\n",
1764
+ " <td>NaN</td>\n",
1765
+ " <td>NaN</td>\n",
1766
+ " <td>NaN</td>\n",
1767
+ " <td>NaN</td>\n",
1768
+ " <td>NaN</td>\n",
1769
+ " </tr>\n",
1770
+ " <tr>\n",
1771
+ " <th>US</th>\n",
1772
+ " <td>North America</td>\n",
1773
+ " <td>NaN</td>\n",
1774
+ " <td>NaN</td>\n",
1775
+ " <td>NaN</td>\n",
1776
+ " <td>NaN</td>\n",
1777
+ " <td>NaN</td>\n",
1778
+ " <td>NaN</td>\n",
1779
+ " <td>NaN</td>\n",
1780
+ " <td>NaN</td>\n",
1781
+ " <td>NaN</td>\n",
1782
+ " <td>NaN</td>\n",
1783
+ " </tr>\n",
1784
+ " <tr>\n",
1785
+ " <th>Fiji</th>\n",
1786
+ " <td>Oceania</td>\n",
1787
+ " <td>NaN</td>\n",
1788
+ " <td>NaN</td>\n",
1789
+ " <td>NaN</td>\n",
1790
+ " <td>NaN</td>\n",
1791
+ " <td>NaN</td>\n",
1792
+ " <td>NaN</td>\n",
1793
+ " <td>NaN</td>\n",
1794
+ " <td>NaN</td>\n",
1795
+ " <td>NaN</td>\n",
1796
+ " <td>NaN</td>\n",
1797
+ " </tr>\n",
1798
+ " <tr>\n",
1799
+ " <th>Kiribati</th>\n",
1800
+ " <td>Oceania</td>\n",
1801
+ " <td>NaN</td>\n",
1802
+ " <td>NaN</td>\n",
1803
+ " <td>NaN</td>\n",
1804
+ " <td>NaN</td>\n",
1805
+ " <td>NaN</td>\n",
1806
+ " <td>NaN</td>\n",
1807
+ " <td>NaN</td>\n",
1808
+ " <td>NaN</td>\n",
1809
+ " <td>NaN</td>\n",
1810
+ " <td>NaN</td>\n",
1811
+ " </tr>\n",
1812
+ " <tr>\n",
1813
+ " <th>Marshall Islands</th>\n",
1814
+ " <td>Oceania</td>\n",
1815
+ " <td>NaN</td>\n",
1816
+ " <td>NaN</td>\n",
1817
+ " <td>NaN</td>\n",
1818
+ " <td>NaN</td>\n",
1819
+ " <td>NaN</td>\n",
1820
+ " <td>NaN</td>\n",
1821
+ " <td>NaN</td>\n",
1822
+ " <td>NaN</td>\n",
1823
+ " <td>NaN</td>\n",
1824
+ " <td>NaN</td>\n",
1825
+ " </tr>\n",
1826
+ " <tr>\n",
1827
+ " <th>Micronesia</th>\n",
1828
+ " <td>Oceania</td>\n",
1829
+ " <td>NaN</td>\n",
1830
+ " <td>NaN</td>\n",
1831
+ " <td>NaN</td>\n",
1832
+ " <td>NaN</td>\n",
1833
+ " <td>NaN</td>\n",
1834
+ " <td>NaN</td>\n",
1835
+ " <td>NaN</td>\n",
1836
+ " <td>NaN</td>\n",
1837
+ " <td>NaN</td>\n",
1838
+ " <td>NaN</td>\n",
1839
+ " </tr>\n",
1840
+ " <tr>\n",
1841
+ " <th>Nauru</th>\n",
1842
+ " <td>Oceania</td>\n",
1843
+ " <td>NaN</td>\n",
1844
+ " <td>NaN</td>\n",
1845
+ " <td>NaN</td>\n",
1846
+ " <td>NaN</td>\n",
1847
+ " <td>NaN</td>\n",
1848
+ " <td>NaN</td>\n",
1849
+ " <td>NaN</td>\n",
1850
+ " <td>NaN</td>\n",
1851
+ " <td>NaN</td>\n",
1852
+ " <td>NaN</td>\n",
1853
+ " </tr>\n",
1854
+ " <tr>\n",
1855
+ " <th>Palau</th>\n",
1856
+ " <td>Oceania</td>\n",
1857
+ " <td>NaN</td>\n",
1858
+ " <td>NaN</td>\n",
1859
+ " <td>NaN</td>\n",
1860
+ " <td>NaN</td>\n",
1861
+ " <td>NaN</td>\n",
1862
+ " <td>NaN</td>\n",
1863
+ " <td>NaN</td>\n",
1864
+ " <td>NaN</td>\n",
1865
+ " <td>NaN</td>\n",
1866
+ " <td>NaN</td>\n",
1867
+ " </tr>\n",
1868
+ " <tr>\n",
1869
+ " <th>Papua New Guinea</th>\n",
1870
+ " <td>Oceania</td>\n",
1871
+ " <td>NaN</td>\n",
1872
+ " <td>NaN</td>\n",
1873
+ " <td>NaN</td>\n",
1874
+ " <td>NaN</td>\n",
1875
+ " <td>NaN</td>\n",
1876
+ " <td>NaN</td>\n",
1877
+ " <td>NaN</td>\n",
1878
+ " <td>NaN</td>\n",
1879
+ " <td>NaN</td>\n",
1880
+ " <td>NaN</td>\n",
1881
+ " </tr>\n",
1882
+ " <tr>\n",
1883
+ " <th>Samoa</th>\n",
1884
+ " <td>Oceania</td>\n",
1885
+ " <td>NaN</td>\n",
1886
+ " <td>NaN</td>\n",
1887
+ " <td>NaN</td>\n",
1888
+ " <td>NaN</td>\n",
1889
+ " <td>NaN</td>\n",
1890
+ " <td>NaN</td>\n",
1891
+ " <td>NaN</td>\n",
1892
+ " <td>NaN</td>\n",
1893
+ " <td>NaN</td>\n",
1894
+ " <td>NaN</td>\n",
1895
+ " </tr>\n",
1896
+ " <tr>\n",
1897
+ " <th>Solomon Islands</th>\n",
1898
+ " <td>Oceania</td>\n",
1899
+ " <td>NaN</td>\n",
1900
+ " <td>NaN</td>\n",
1901
+ " <td>NaN</td>\n",
1902
+ " <td>NaN</td>\n",
1903
+ " <td>NaN</td>\n",
1904
+ " <td>NaN</td>\n",
1905
+ " <td>NaN</td>\n",
1906
+ " <td>NaN</td>\n",
1907
+ " <td>NaN</td>\n",
1908
+ " <td>NaN</td>\n",
1909
+ " </tr>\n",
1910
+ " <tr>\n",
1911
+ " <th>Tonga</th>\n",
1912
+ " <td>Oceania</td>\n",
1913
+ " <td>NaN</td>\n",
1914
+ " <td>NaN</td>\n",
1915
+ " <td>NaN</td>\n",
1916
+ " <td>NaN</td>\n",
1917
+ " <td>NaN</td>\n",
1918
+ " <td>NaN</td>\n",
1919
+ " <td>NaN</td>\n",
1920
+ " <td>NaN</td>\n",
1921
+ " <td>NaN</td>\n",
1922
+ " <td>NaN</td>\n",
1923
+ " </tr>\n",
1924
+ " <tr>\n",
1925
+ " <th>Tuvalu</th>\n",
1926
+ " <td>Oceania</td>\n",
1927
+ " <td>NaN</td>\n",
1928
+ " <td>NaN</td>\n",
1929
+ " <td>NaN</td>\n",
1930
+ " <td>NaN</td>\n",
1931
+ " <td>NaN</td>\n",
1932
+ " <td>NaN</td>\n",
1933
+ " <td>NaN</td>\n",
1934
+ " <td>NaN</td>\n",
1935
+ " <td>NaN</td>\n",
1936
+ " <td>NaN</td>\n",
1937
+ " </tr>\n",
1938
+ " <tr>\n",
1939
+ " <th>Vanuatu</th>\n",
1940
+ " <td>Oceania</td>\n",
1941
+ " <td>NaN</td>\n",
1942
+ " <td>NaN</td>\n",
1943
+ " <td>NaN</td>\n",
1944
+ " <td>NaN</td>\n",
1945
+ " <td>NaN</td>\n",
1946
+ " <td>NaN</td>\n",
1947
+ " <td>NaN</td>\n",
1948
+ " <td>NaN</td>\n",
1949
+ " <td>NaN</td>\n",
1950
+ " <td>NaN</td>\n",
1951
+ " </tr>\n",
1952
+ " </tbody>\n",
1953
+ "</table>\n",
1954
+ "</div>"
1955
+ ],
1956
+ "text/plain": [
1957
+ " Continent year Life Ladder \\\n",
1958
+ "Country \n",
1959
+ "Burkina Africa NaN NaN \n",
1960
+ "Cape Verde Africa NaN NaN \n",
1961
+ "Congo Africa NaN NaN \n",
1962
+ "Congo, Democratic Republic of Africa NaN NaN \n",
1963
+ "Equatorial Guinea Africa NaN NaN \n",
1964
+ "Eritrea Africa NaN NaN \n",
1965
+ "Guinea-Bissau Africa NaN NaN \n",
1966
+ "Sao Tome and Principe Africa NaN NaN \n",
1967
+ "Seychelles Africa NaN NaN \n",
1968
+ "Brunei Asia NaN NaN \n",
1969
+ "Burma (Myanmar) Asia NaN NaN \n",
1970
+ "East Timor Asia NaN NaN \n",
1971
+ "Korea, North Asia NaN NaN \n",
1972
+ "Korea, South Asia NaN NaN \n",
1973
+ "Russian Federation Asia NaN NaN \n",
1974
+ "Andorra Europe NaN NaN \n",
1975
+ "CZ Europe NaN NaN \n",
1976
+ "Liechtenstein Europe NaN NaN \n",
1977
+ "Macedonia Europe NaN NaN \n",
1978
+ "Monaco Europe NaN NaN \n",
1979
+ "San Marino Europe NaN NaN \n",
1980
+ "Vatican City Europe NaN NaN \n",
1981
+ "Antigua and Barbuda North America NaN NaN \n",
1982
+ "Bahamas North America NaN NaN \n",
1983
+ "Barbados North America NaN NaN \n",
1984
+ "Dominica North America NaN NaN \n",
1985
+ "Grenada North America NaN NaN \n",
1986
+ "Saint Kitts and Nevis North America NaN NaN \n",
1987
+ "Saint Lucia North America NaN NaN \n",
1988
+ "Saint Vincent and the Grenadines North America NaN NaN \n",
1989
+ "US North America NaN NaN \n",
1990
+ "Fiji Oceania NaN NaN \n",
1991
+ "Kiribati Oceania NaN NaN \n",
1992
+ "Marshall Islands Oceania NaN NaN \n",
1993
+ "Micronesia Oceania NaN NaN \n",
1994
+ "Nauru Oceania NaN NaN \n",
1995
+ "Palau Oceania NaN NaN \n",
1996
+ "Papua New Guinea Oceania NaN NaN \n",
1997
+ "Samoa Oceania NaN NaN \n",
1998
+ "Solomon Islands Oceania NaN NaN \n",
1999
+ "Tonga Oceania NaN NaN \n",
2000
+ "Tuvalu Oceania NaN NaN \n",
2001
+ "Vanuatu Oceania NaN NaN \n",
2002
+ "\n",
2003
+ " Log GDP per capita Social support \\\n",
2004
+ "Country \n",
2005
+ "Burkina NaN NaN \n",
2006
+ "Cape Verde NaN NaN \n",
2007
+ "Congo NaN NaN \n",
2008
+ "Congo, Democratic Republic of NaN NaN \n",
2009
+ "Equatorial Guinea NaN NaN \n",
2010
+ "Eritrea NaN NaN \n",
2011
+ "Guinea-Bissau NaN NaN \n",
2012
+ "Sao Tome and Principe NaN NaN \n",
2013
+ "Seychelles NaN NaN \n",
2014
+ "Brunei NaN NaN \n",
2015
+ "Burma (Myanmar) NaN NaN \n",
2016
+ "East Timor NaN NaN \n",
2017
+ "Korea, North NaN NaN \n",
2018
+ "Korea, South NaN NaN \n",
2019
+ "Russian Federation NaN NaN \n",
2020
+ "Andorra NaN NaN \n",
2021
+ "CZ NaN NaN \n",
2022
+ "Liechtenstein NaN NaN \n",
2023
+ "Macedonia NaN NaN \n",
2024
+ "Monaco NaN NaN \n",
2025
+ "San Marino NaN NaN \n",
2026
+ "Vatican City NaN NaN \n",
2027
+ "Antigua and Barbuda NaN NaN \n",
2028
+ "Bahamas NaN NaN \n",
2029
+ "Barbados NaN NaN \n",
2030
+ "Dominica NaN NaN \n",
2031
+ "Grenada NaN NaN \n",
2032
+ "Saint Kitts and Nevis NaN NaN \n",
2033
+ "Saint Lucia NaN NaN \n",
2034
+ "Saint Vincent and the Grenadines NaN NaN \n",
2035
+ "US NaN NaN \n",
2036
+ "Fiji NaN NaN \n",
2037
+ "Kiribati NaN NaN \n",
2038
+ "Marshall Islands NaN NaN \n",
2039
+ "Micronesia NaN NaN \n",
2040
+ "Nauru NaN NaN \n",
2041
+ "Palau NaN NaN \n",
2042
+ "Papua New Guinea NaN NaN \n",
2043
+ "Samoa NaN NaN \n",
2044
+ "Solomon Islands NaN NaN \n",
2045
+ "Tonga NaN NaN \n",
2046
+ "Tuvalu NaN NaN \n",
2047
+ "Vanuatu NaN NaN \n",
2048
+ "\n",
2049
+ " Healthy life expectancy at birth \\\n",
2050
+ "Country \n",
2051
+ "Burkina NaN \n",
2052
+ "Cape Verde NaN \n",
2053
+ "Congo NaN \n",
2054
+ "Congo, Democratic Republic of NaN \n",
2055
+ "Equatorial Guinea NaN \n",
2056
+ "Eritrea NaN \n",
2057
+ "Guinea-Bissau NaN \n",
2058
+ "Sao Tome and Principe NaN \n",
2059
+ "Seychelles NaN \n",
2060
+ "Brunei NaN \n",
2061
+ "Burma (Myanmar) NaN \n",
2062
+ "East Timor NaN \n",
2063
+ "Korea, North NaN \n",
2064
+ "Korea, South NaN \n",
2065
+ "Russian Federation NaN \n",
2066
+ "Andorra NaN \n",
2067
+ "CZ NaN \n",
2068
+ "Liechtenstein NaN \n",
2069
+ "Macedonia NaN \n",
2070
+ "Monaco NaN \n",
2071
+ "San Marino NaN \n",
2072
+ "Vatican City NaN \n",
2073
+ "Antigua and Barbuda NaN \n",
2074
+ "Bahamas NaN \n",
2075
+ "Barbados NaN \n",
2076
+ "Dominica NaN \n",
2077
+ "Grenada NaN \n",
2078
+ "Saint Kitts and Nevis NaN \n",
2079
+ "Saint Lucia NaN \n",
2080
+ "Saint Vincent and the Grenadines NaN \n",
2081
+ "US NaN \n",
2082
+ "Fiji NaN \n",
2083
+ "Kiribati NaN \n",
2084
+ "Marshall Islands NaN \n",
2085
+ "Micronesia NaN \n",
2086
+ "Nauru NaN \n",
2087
+ "Palau NaN \n",
2088
+ "Papua New Guinea NaN \n",
2089
+ "Samoa NaN \n",
2090
+ "Solomon Islands NaN \n",
2091
+ "Tonga NaN \n",
2092
+ "Tuvalu NaN \n",
2093
+ "Vanuatu NaN \n",
2094
+ "\n",
2095
+ " Freedom to make life choices Generosity \\\n",
2096
+ "Country \n",
2097
+ "Burkina NaN NaN \n",
2098
+ "Cape Verde NaN NaN \n",
2099
+ "Congo NaN NaN \n",
2100
+ "Congo, Democratic Republic of NaN NaN \n",
2101
+ "Equatorial Guinea NaN NaN \n",
2102
+ "Eritrea NaN NaN \n",
2103
+ "Guinea-Bissau NaN NaN \n",
2104
+ "Sao Tome and Principe NaN NaN \n",
2105
+ "Seychelles NaN NaN \n",
2106
+ "Brunei NaN NaN \n",
2107
+ "Burma (Myanmar) NaN NaN \n",
2108
+ "East Timor NaN NaN \n",
2109
+ "Korea, North NaN NaN \n",
2110
+ "Korea, South NaN NaN \n",
2111
+ "Russian Federation NaN NaN \n",
2112
+ "Andorra NaN NaN \n",
2113
+ "CZ NaN NaN \n",
2114
+ "Liechtenstein NaN NaN \n",
2115
+ "Macedonia NaN NaN \n",
2116
+ "Monaco NaN NaN \n",
2117
+ "San Marino NaN NaN \n",
2118
+ "Vatican City NaN NaN \n",
2119
+ "Antigua and Barbuda NaN NaN \n",
2120
+ "Bahamas NaN NaN \n",
2121
+ "Barbados NaN NaN \n",
2122
+ "Dominica NaN NaN \n",
2123
+ "Grenada NaN NaN \n",
2124
+ "Saint Kitts and Nevis NaN NaN \n",
2125
+ "Saint Lucia NaN NaN \n",
2126
+ "Saint Vincent and the Grenadines NaN NaN \n",
2127
+ "US NaN NaN \n",
2128
+ "Fiji NaN NaN \n",
2129
+ "Kiribati NaN NaN \n",
2130
+ "Marshall Islands NaN NaN \n",
2131
+ "Micronesia NaN NaN \n",
2132
+ "Nauru NaN NaN \n",
2133
+ "Palau NaN NaN \n",
2134
+ "Papua New Guinea NaN NaN \n",
2135
+ "Samoa NaN NaN \n",
2136
+ "Solomon Islands NaN NaN \n",
2137
+ "Tonga NaN NaN \n",
2138
+ "Tuvalu NaN NaN \n",
2139
+ "Vanuatu NaN NaN \n",
2140
+ "\n",
2141
+ " Perceptions of corruption Positive affect \\\n",
2142
+ "Country \n",
2143
+ "Burkina NaN NaN \n",
2144
+ "Cape Verde NaN NaN \n",
2145
+ "Congo NaN NaN \n",
2146
+ "Congo, Democratic Republic of NaN NaN \n",
2147
+ "Equatorial Guinea NaN NaN \n",
2148
+ "Eritrea NaN NaN \n",
2149
+ "Guinea-Bissau NaN NaN \n",
2150
+ "Sao Tome and Principe NaN NaN \n",
2151
+ "Seychelles NaN NaN \n",
2152
+ "Brunei NaN NaN \n",
2153
+ "Burma (Myanmar) NaN NaN \n",
2154
+ "East Timor NaN NaN \n",
2155
+ "Korea, North NaN NaN \n",
2156
+ "Korea, South NaN NaN \n",
2157
+ "Russian Federation NaN NaN \n",
2158
+ "Andorra NaN NaN \n",
2159
+ "CZ NaN NaN \n",
2160
+ "Liechtenstein NaN NaN \n",
2161
+ "Macedonia NaN NaN \n",
2162
+ "Monaco NaN NaN \n",
2163
+ "San Marino NaN NaN \n",
2164
+ "Vatican City NaN NaN \n",
2165
+ "Antigua and Barbuda NaN NaN \n",
2166
+ "Bahamas NaN NaN \n",
2167
+ "Barbados NaN NaN \n",
2168
+ "Dominica NaN NaN \n",
2169
+ "Grenada NaN NaN \n",
2170
+ "Saint Kitts and Nevis NaN NaN \n",
2171
+ "Saint Lucia NaN NaN \n",
2172
+ "Saint Vincent and the Grenadines NaN NaN \n",
2173
+ "US NaN NaN \n",
2174
+ "Fiji NaN NaN \n",
2175
+ "Kiribati NaN NaN \n",
2176
+ "Marshall Islands NaN NaN \n",
2177
+ "Micronesia NaN NaN \n",
2178
+ "Nauru NaN NaN \n",
2179
+ "Palau NaN NaN \n",
2180
+ "Papua New Guinea NaN NaN \n",
2181
+ "Samoa NaN NaN \n",
2182
+ "Solomon Islands NaN NaN \n",
2183
+ "Tonga NaN NaN \n",
2184
+ "Tuvalu NaN NaN \n",
2185
+ "Vanuatu NaN NaN \n",
2186
+ "\n",
2187
+ " Negative affect \n",
2188
+ "Country \n",
2189
+ "Burkina NaN \n",
2190
+ "Cape Verde NaN \n",
2191
+ "Congo NaN \n",
2192
+ "Congo, Democratic Republic of NaN \n",
2193
+ "Equatorial Guinea NaN \n",
2194
+ "Eritrea NaN \n",
2195
+ "Guinea-Bissau NaN \n",
2196
+ "Sao Tome and Principe NaN \n",
2197
+ "Seychelles NaN \n",
2198
+ "Brunei NaN \n",
2199
+ "Burma (Myanmar) NaN \n",
2200
+ "East Timor NaN \n",
2201
+ "Korea, North NaN \n",
2202
+ "Korea, South NaN \n",
2203
+ "Russian Federation NaN \n",
2204
+ "Andorra NaN \n",
2205
+ "CZ NaN \n",
2206
+ "Liechtenstein NaN \n",
2207
+ "Macedonia NaN \n",
2208
+ "Monaco NaN \n",
2209
+ "San Marino NaN \n",
2210
+ "Vatican City NaN \n",
2211
+ "Antigua and Barbuda NaN \n",
2212
+ "Bahamas NaN \n",
2213
+ "Barbados NaN \n",
2214
+ "Dominica NaN \n",
2215
+ "Grenada NaN \n",
2216
+ "Saint Kitts and Nevis NaN \n",
2217
+ "Saint Lucia NaN \n",
2218
+ "Saint Vincent and the Grenadines NaN \n",
2219
+ "US NaN \n",
2220
+ "Fiji NaN \n",
2221
+ "Kiribati NaN \n",
2222
+ "Marshall Islands NaN \n",
2223
+ "Micronesia NaN \n",
2224
+ "Nauru NaN \n",
2225
+ "Palau NaN \n",
2226
+ "Papua New Guinea NaN \n",
2227
+ "Samoa NaN \n",
2228
+ "Solomon Islands NaN \n",
2229
+ "Tonga NaN \n",
2230
+ "Tuvalu NaN \n",
2231
+ "Vanuatu NaN "
2232
+ ]
2233
+ },
2234
+ "execution_count": 88,
2235
+ "metadata": {},
2236
+ "output_type": "execute_result"
2237
+ }
2238
+ ],
2239
+ "source": [
2240
+ "df3"
2241
+ ]
2242
+ },
2243
+ {
2244
+ "cell_type": "markdown",
2245
+ "id": "db01b828-d1b1-4708-b6bd-3b2dbed54746",
2246
+ "metadata": {},
2247
+ "source": [
2248
+ "> Note that I updated these in the spreadsheet manually with Excel because it was faster to do it by hand... I should go back when I have time to do it programmatically..."
2249
+ ]
2250
+ }
2251
+ ],
2252
+ "metadata": {
2253
+ "kernelspec": {
2254
+ "display_name": "Python 3 (ipykernel)",
2255
+ "language": "python",
2256
+ "name": "python3"
2257
+ },
2258
+ "language_info": {
2259
+ "codemirror_mode": {
2260
+ "name": "ipython",
2261
+ "version": 3
2262
+ },
2263
+ "file_extension": ".py",
2264
+ "mimetype": "text/x-python",
2265
+ "name": "python",
2266
+ "nbconvert_exporter": "python",
2267
+ "pygments_lexer": "ipython3",
2268
+ "version": "3.8.8"
2269
+ }
2270
+ },
2271
+ "nbformat": 4,
2272
+ "nbformat_minor": 5
2273
+ }
Assets/Countries/combined-countries.csv ADDED
@@ -0,0 +1,198 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Words,year,Life Ladder,Log GDP per capita,Social support,Healthy life expectancy at birth,Freedom to make life choices,Generosity,Perceptions of corruption,Positive affect,Negative affect,Categories
2
+ Afghanistan,2019,2.375091791,7.697247982,0.419972867,52.40000153,0.393656164,-0.108458869,0.923849106,0.351387054,0.502473712,Asia
3
+ Albania,2020,5.364909649,9.497251511,0.710115016,69.30000305,0.75367105,0.006968025,0.891358972,0.678661227,0.265066117,Europe
4
+ Algeria,2019,4.744627476,9.336946487,0.803258657,66.09999847,0.385083437,0.00508652,0.740609348,0.584944308,0.215197757,Africa
5
+ Andorra,,,,,,,,,,,Europe
6
+ Angola,2014,3.794837952,9.016735077,0.754615486,54.59999847,0.374541551,-0.167722687,0.83407563,0.578517139,0.367864132,Africa
7
+ Antigua and Barbuda,,,,,,,,,,,North America
8
+ Argentina,2020,5.900567055,9.850449562,0.897103846,69.19999695,0.823391616,-0.122354329,0.815780461,0.763523877,0.342496932,South America
9
+ Armenia,2019,5.4880867,9.521769524,0.781603873,67.19999695,0.844324112,-0.172368988,0.583472729,0.598237813,0.430463403,Europe
10
+ Australia,2020,7.137367725,10.75986385,0.936517,74.19999695,0.905282974,0.210030302,0.491094828,0.769181728,0.205077678,Oceania
11
+ Austria,2020,7.213489056,10.85111809,0.924831212,73.59999847,0.911909878,0.011031743,0.463830173,0.769316614,0.206499651,Europe
12
+ Azerbaijan,2019,5.173389435,9.575250626,0.88675642,65.80000305,0.854248524,-0.214162916,0.457260668,0.642546833,0.163920254,Europe
13
+ Bahamas,,,,,,,,,,,North America
14
+ Bahrain,2020,6.173175812,10.61990356,0.847745061,69.69999695,0.94523257,0.132441044,,0.789794981,0.296835452,Asia
15
+ Bangladesh,2020,5.279986858,8.472194672,0.739337921,65.30000305,0.777467191,-0.008851291,0.741659164,0.582380831,0.331708789,Asia
16
+ Barbados,,,,,,,,,,,North America
17
+ Belarus,2019,5.821453094,9.860038757,0.916740477,66.40000153,0.656933606,-0.185933307,0.545904756,0.590850592,0.189821407,Europe
18
+ Belgium,2020,6.838760853,10.77053738,0.903558671,72.40000153,0.766917825,-0.163784489,0.633626759,0.646510303,0.260188788,Europe
19
+ Belize,2014,5.955646515,8.883127213,0.756932497,62.22000122,0.873569071,0.021995628,0.782105386,0.754977345,0.281604409,North America
20
+ Benin,2020,4.407745838,8.102292061,0.506636083,55.09999847,0.783114672,-0.08348871,0.531883657,0.608584642,0.304512441,Africa
21
+ Bhutan,2015,5.082128525,9.218923569,0.847574413,60.20000076,0.83010155,0.277412355,0.633955777,0.80964148,0.311589301,Asia
22
+ Bolivia,2020,5.559258938,8.997989655,0.804810882,64.19999695,0.877031922,-0.053763788,0.868208289,0.789818466,0.381791174,South America
23
+ Bosnia and Herzegovina,2020,5.515816212,9.58334446,0.898518682,68.40000153,0.740250826,0.137954175,0.916052163,0.644237339,0.325412303,Europe
24
+ Botswana,2019,3.471084833,9.785069466,0.773667216,59.59999847,0.832542658,-0.239000931,0.792079508,0.711796343,0.272721767,Africa
25
+ Brazil,2020,6.109717846,9.522140503,0.830832124,66.80000305,0.786235094,-0.052820019,0.728772223,0.692023873,0.38913855,South America
26
+ Brunei,,,,,,,,,,,Asia
27
+ Bulgaria,2020,5.597723007,9.990657806,0.916242361,67.19999695,0.818224788,-0.004322314,0.900632977,0.705834627,0.221351057,Europe
28
+ Burkina Faso,2019,4.740892887,7.691488266,0.683102369,54.40000153,0.677546859,-0.004089894,0.729396582,0.690925896,0.364775389,Africa
29
+ Burundi,2018,3.775283098,6.635322094,0.484715223,53.40000153,0.646398604,-0.023876166,0.598607659,0.6664415,0.362766594,Africa
30
+ Cambodia,2020,4.376985073,8.361935616,0.724422634,62.40000153,0.963075459,0.052429765,0.863053977,0.877953529,0.38985163,Asia
31
+ Cameroon,2020,5.2410779,8.17463398,0.720046639,54.29999924,0.674509168,0.049266182,0.836517215,0.629614651,0.386478961,Africa
32
+ Canada,2020,7.024904728,10.72951412,0.930610716,74,0.8868922,0.049636856,0.434012353,0.795948744,0.306673735,North America
33
+ Central African Republic,2017,3.475862026,6.81651926,0.319589138,45.20000076,0.645252347,0.0727861,0.889566004,0.613865197,0.599335492,Africa
34
+ Chad,2019,4.250799179,7.364943981,0.640452087,48.70000076,0.537245691,0.055000938,0.832283497,0.587211192,0.460061282,Africa
35
+ Chile,2020,6.150642872,10.0201416,0.888412297,70.09999847,0.781383574,0.032990757,0.811818838,0.814602733,0.336028606,South America
36
+ China,2020,5.771064758,9.70175457,0.80833447,69.90000153,0.891122997,-0.103214338,,0.789345384,0.244918227,Asia
37
+ Colombia,2020,5.70917511,9.495491028,0.797035217,68.30000305,0.840186119,-0.084642209,0.807964027,0.795132697,0.340158582,South America
38
+ Comoros,2019,4.608616352,8.03313446,0.632012963,57.5,0.538261533,0.077253081,0.762232482,0.736221731,0.336162895,Africa
39
+ Congo (Brazzaville),2019,5.212622643,8.101092339,0.624768078,58.5,0.686451972,-0.046051238,0.74058944,0.645253956,0.40504083,Africa
40
+ Congo (Kinshasa),2017,4.311033249,6.965845585,0.669688404,52.90000153,0.704239547,0.068378173,0.809181869,0.550525904,0.404262066,Africa
41
+ Costa Rica,2019,6.997618675,9.885446548,0.906077445,71.5,0.926830113,-0.145994335,0.835628331,0.848347604,0.303327233,North America
42
+ Croatia,2020,6.507992268,10.16581726,0.922913492,71.40000153,0.836657643,-0.062968105,0.960939288,0.742780507,0.285609752,Europe
43
+ Cuba,2006,5.417868614,,0.969595134,68.44000244,0.281457931,,,0.646711767,0.276601523,North America
44
+ Cyprus,2020,6.259810448,,0.805559397,74.09999847,0.762782335,,0.816231728,0.758863032,0.283522457,Europe
45
+ North Cyprus,2019,5.4666152,,0.803294539,,0.792734623,,0.640058875,0.493692875,0.296411127,Asia
46
+ Czech Republic,2020,6.897091389,10.5301342,0.96405369,71.30000305,0.906422019,-0.127022371,0.883699596,0.832057655,0.290441692,Europe
47
+ Denmark,2020,7.514631271,10.90999508,0.947371364,73,0.937931836,0.052293025,0.213841751,0.81766367,0.227101892,Europe
48
+ Djibouti,2011,4.369193554,7.880099297,0.632973254,54.70000076,0.746439457,-0.057318915,0.518930137,0.579302847,0.180592626,Africa
49
+ Dominican Republic,2020,5.168409824,9.802446365,0.806117654,66.40000153,0.834642947,-0.127834037,0.636116564,0.73386693,0.313928306,North America
50
+ East Timor,,,,,,,,,,,Asia
51
+ Ecuador,2020,5.35446167,9.243865013,0.804008543,69.09999847,0.828511536,-0.157090038,0.854780495,0.789940715,0.416027963,South America
52
+ Egypt,2020,4.472396851,9.382726669,0.672725499,62.29999924,0.769550323,-0.112341978,,0.598908663,0.442033589,Africa
53
+ El Salvador,2020,5.461926937,9.018845558,0.695624352,66.69999695,0.923944831,-0.1264745,0.583036363,0.838904202,0.329439789,North America
54
+ Equatorial Guinea,,,,,,,,,,,Africa
55
+ Eritrea,,,,,,,,,,,Africa
56
+ Estonia,2020,6.452563763,10.4585886,0.957770467,69,0.954200566,-0.082279153,0.397834778,0.806923807,0.187679499,Europe
57
+ Ethiopia,2020,4.549219608,7.7109828,0.823137581,59.5,0.768694282,0.188496858,0.783822417,0.669388652,0.251514345,Africa
58
+ Fiji,,,,,,,,,,,Oceania
59
+ Finland,2020,7.889349937,10.75044632,0.961620748,72.09999847,0.962423682,-0.115531988,0.163635895,0.7442922,0.192897573,Europe
60
+ France,2020,6.714111805,10.64328003,0.947354019,74.19999695,0.823386312,-0.168960527,0.564640582,0.731813908,0.23095043,Europe
61
+ Gabon,2019,4.914393425,9.607087135,0.763051689,60.20000076,0.736349881,-0.202519819,0.84625423,0.692702413,0.412960976,Africa
62
+ Gambia,2019,5.163627148,7.69934988,0.693870127,55.29999924,0.676595271,0.410180479,0.798108101,0.772816181,0.400723279,Africa
63
+ Georgia,2020,5.123143196,9.569304466,0.71834594,64.09999847,0.764352381,-0.221125469,0.582734704,0.610894918,0.294512063,Europe
64
+ Germany,2020,7.311897755,10.83349895,0.905080497,72.80000305,0.864356041,-0.06004804,0.424088776,0.759594321,0.205927119,Europe
65
+ Ghana,2020,5.31948328,8.589605331,0.642703354,58,0.823720038,0.199632064,0.847024918,0.712765932,0.252728432,Africa
66
+ Greece,2020,5.787615776,10.21457958,0.778536558,72.80000305,0.56461364,-0.24080646,0.764324546,0.684457839,0.321684211,Europe
67
+ Grenada,,,,,,,,,,,North America
68
+ Guatemala,2019,6.262175083,9.063875198,0.774074376,65.09999847,0.90067631,-0.062302988,0.772577941,0.85941267,0.310789257,North America
69
+ Guinea,2019,4.76768446,7.849340439,0.655124187,55.5,0.691399097,0.09681724,0.755585492,0.684646904,0.473388433,Africa
70
+ Guyana,2007,5.992826462,8.773288727,0.848765194,57.25999832,0.694005668,0.110037036,0.835569084,0.767540574,0.29641977,South America
71
+ Haiti,2018,3.614928007,7.477138042,0.537975907,55.70000076,0.591468394,0.421520352,0.720444739,0.5841133,0.358720034,North America
72
+ Honduras,2019,5.930051327,8.65311718,0.797148347,67.40000153,0.846190035,0.062708922,0.814962924,0.849954963,0.278882086,North America
73
+ Hong Kong,2020,5.295341492,,0.812942982,,0.705452263,,0.380351216,0.608647346,0.210313618,Asia
74
+ Hungary,2020,6.038049698,10.33514786,0.943400383,68.40000153,0.77096808,-0.120404616,0.836105108,0.735238373,0.24005194,Europe
75
+ Iceland,2020,7.575489521,10.82420063,0.983286083,73,0.948627174,0.160273999,0.64406389,0.863017619,0.171795145,Europe
76
+ India,2020,4.225281239,8.702772141,0.616639256,60.90000153,0.906391323,0.074823797,0.780124009,0.752433956,0.383162528,Asia
77
+ Indonesia,2019,5.346512794,9.376888275,0.80191803,62.29999924,0.865859151,0.555348039,0.860784769,0.876714051,0.301702797,Asia
78
+ Iran,2020,4.864528179,,0.757218659,66.59999847,0.599594474,,0.70990169,0.582420528,0.470245004,Asia
79
+ Iraq,2020,4.78516531,9.167185783,0.707847476,61.40000153,0.700214565,-0.020748287,0.849108756,0.644464254,0.531538904,Asia
80
+ Ireland,2020,7.034930706,11.3228035,0.960311055,72.5,0.882098258,0.013816552,0.355632722,0.796661019,0.246447265,Europe
81
+ Israel,2020,7.194928169,10.53805351,0.959072173,73.69999695,0.831315815,-0.049371675,0.74763906,0.62139833,0.242825732,Asia
82
+ Italy,2020,6.488356113,10.56257153,0.889824033,74,0.718155444,-0.149937257,0.844094574,0.670213342,0.311002165,Europe
83
+ Ivory Coast,2020,5.256503582,8.564923286,0.61310631,50.70000076,0.769998014,0.015563689,0.776687264,0.692646921,0.33991909,Africa
84
+ Jamaica,2019,6.309238911,9.186201096,0.877814472,67.5,0.890670836,-0.136797056,0.885330021,0.752041101,0.195284143,North America
85
+ Japan,2020,6.117963314,10.57954788,0.887249112,75.19999695,0.806036115,-0.258745283,0.608698547,0.74246943,0.186461002,Asia
86
+ Jordan,2020,4.093991756,9.14999485,0.708839893,67.19999695,0.778533459,-0.149825886,,,,Asia
87
+ Kazakhstan,2020,6.168269157,10.13533592,0.966448963,65.80000305,0.872100115,-0.056175169,0.660798848,0.684102654,0.150359914,Asia
88
+ Kenya,2020,4.546584129,8.365282059,0.673717618,61.29999924,0.702034473,0.259969592,0.836516023,0.733434856,0.296980411,Africa
89
+ Kiribati,,,,,,,,,,,Oceania
90
+ Kosovo,2020,6.294414043,,0.792374492,,0.879837573,,0.90989387,0.72623986,0.201458037,Europe
91
+ Kuwait,2019,6.106119633,10.81669617,0.841519773,66.90000153,0.867273808,-0.104161076,,0.695362747,0.302876323,Asia
92
+ Kyrgyzstan,2020,6.249586105,8.503411293,0.902222991,64.69999695,0.934885323,0.102865741,0.931317508,0.803025365,0.257813066,Asia
93
+ Laos,2020,5.284390926,8.959955215,0.660396278,59.5,0.915028214,0.141430691,0.747997701,0.821680248,0.358349264,Asia
94
+ Latvia,2020,6.229008675,10.29959011,0.928012192,67.40000153,0.820111692,-0.077660471,0.808821976,0.713628411,0.201582372,Europe
95
+ Lebanon,2019,4.024219513,9.596782684,0.865968525,67.59999847,0.447001487,-0.081082396,0.890415609,0.321689755,0.494499028,Asia
96
+ Lesotho,2019,3.5117805,7.925776958,0.789705396,48.70000076,0.716313541,-0.130536228,0.914951444,0.734879911,0.273425519,Africa
97
+ Liberia,2019,5.121460915,7.263903618,0.71247375,56.90000153,0.705874562,0.050611626,0.828468978,0.635608971,0.389132589,Africa
98
+ Libya,2019,5.33022213,9.627349854,0.826719344,62.29999924,0.761964321,-0.072672851,0.68641299,0.70874089,0.400737435,Africa
99
+ Liechtenstein,,,,,,,,,,,Europe
100
+ Lithuania,2020,6.39137888,10.5036068,0.952544093,68.5,0.824060559,-0.121781312,0.829204798,0.660229564,0.201912001,Europe
101
+ Luxembourg,2019,7.404015541,11.64816856,0.912104547,72.59999847,0.930321217,-0.045057613,0.389598429,0.789186358,0.211639807,Europe
102
+ Macedonia,,,,,,,,,,,Europe
103
+ Madagascar,2019,4.339087486,7.406237125,0.700610101,59.5,0.549535215,-0.012468655,0.719982684,0.723194659,0.303959668,Africa
104
+ Malawi,2019,3.869123697,6.965763092,0.548956096,58.29999924,0.764864206,0.003596819,0.680247962,0.53669703,0.348162442,Africa
105
+ Malaysia,2019,5.427954197,10.25240326,0.842498839,67.19999695,0.915778697,0.123324133,0.781943917,0.834177494,0.176071689,Asia
106
+ Maldives,2018,5.197574615,9.825985909,0.913315058,70.59999847,0.854759276,0.023997834,,,,Asia
107
+ Mali,2019,4.98799181,7.752494812,0.754558086,52.20000076,0.67040509,-0.037851758,0.846340001,0.711522698,0.357764512,Africa
108
+ Malta,2020,6.156822681,,0.937920272,72.19999695,0.930600464,,0.67462635,0.601495862,0.410913229,Europe
109
+ Marshall Islands,,,,,,,,,,,Oceania
110
+ Mauritania,2019,4.152619362,8.5558424,0.798101962,57.29999924,0.627505183,-0.101856656,0.742890298,0.69183147,0.259738505,Africa
111
+ Mauritius,2020,6.015300274,9.972017288,0.892565966,67,0.842598081,-0.03669272,0.771790087,0.766984463,0.138401791,Africa
112
+ Mexico,2020,5.964221001,9.782189369,0.778816223,68.90000153,0.873346984,-0.119389862,0.778165877,0.810109138,0.29155612,North America
113
+ Micronesia,,,,,,,,,,,Oceania
114
+ Moldova,2020,5.811628819,9.462109566,0.874061763,66.40000153,0.859083235,-0.058278579,0.941438973,0.727224529,0.267836064,Europe
115
+ Monaco,,,,,,,,,,,Europe
116
+ Mongolia,2020,6.011364937,9.395559311,0.917789161,62.70000076,0.718491018,0.141357452,0.842827678,0.636443496,0.259983033,Asia
117
+ Montenegro,2020,5.722162724,9.912668228,0.887129486,68.90000153,0.801855087,0.059815772,0.844687104,0.60328269,0.411377817,Europe
118
+ Morocco,2020,4.80261755,8.87091732,0.552520096,66.5,0.818995237,-0.228577554,0.802740276,0.587182403,0.256431192,Africa
119
+ Mozambique,2019,4.932132721,7.154966831,0.742303729,55.20000076,0.869810224,0.072745018,0.681900442,0.58727473,0.384122759,Africa
120
+ Myanmar,2020,4.431364059,8.55391407,0.795763254,59.59999847,0.824870706,0.470258176,0.646702111,0.799749196,0.289218217,Asia
121
+ Namibia,2020,4.451010227,9.104139328,0.740570307,57.09999847,0.665681958,-0.103880182,0.810354829,0.647919536,0.247542083,Africa
122
+ Nauru,,,,,,,,,,,Oceania
123
+ Nepal,2019,5.448724747,8.136457443,0.772273064,64.59999847,0.790347695,0.166975796,0.711842477,0.535798132,0.357100308,Asia
124
+ Netherlands,2020,7.504447937,10.9005003,0.943956137,72.5,0.934522629,0.151298046,0.280604511,0.783990622,0.246511325,Europe
125
+ New Zealand,2020,7.257381916,10.60045719,0.951990783,73.59999847,0.918154597,0.125259653,0.282767951,0.849415004,0.208541051,Oceania
126
+ Nicaragua,2019,6.112545013,8.595469475,0.873863935,67.80000305,0.882678449,0.029247265,0.62198174,0.83542347,0.337012976,North America
127
+ Niger,2019,5.003544331,7.105849266,0.67695874,54,0.83136189,0.02595989,0.728855133,0.815915167,0.304438263,Africa
128
+ Nigeria,2020,5.502948284,8.484203339,0.739289463,50.5,0.713061512,0.099404059,0.912774444,0.743977726,0.315886825,Africa
129
+ North Korea,,,,,,,,,,,Asia
130
+ North Macedonia,2020,5.053664207,9.690014839,0.750374198,65.55988312,0.787284732,0.131274343,0.877421141,0.604626834,0.365126073,Europe
131
+ Norway,2020,7.290032387,11.04216003,0.955979943,73.40000153,0.964561105,0.075148538,0.271083295,0.823093832,0.216033921,Europe
132
+ Oman,2011,6.852982044,10.38246155,,65.5,0.916293025,0.024908492,,,0.295164108,Asia
133
+ Pakistan,2019,4.442717552,8.453290939,0.617295742,58.90000153,0.684675574,0.123729475,0.775998056,0.581067383,0.424240083,Asia
134
+ Palau,,,,,,,,,,,Oceania
135
+ Palestinian Territories,2019,4.48253727,,0.832550049,,0.653488278,,0.829282761,0.62517643,0.3996723,Asia
136
+ Panama,2019,6.085955143,10.35643101,0.885721385,69.69999695,0.882961094,-0.198984995,0.868827522,0.877561629,0.243566602,North America
137
+ Papua New Guinea,,,,,,,,,,,Oceania
138
+ Paraguay,2019,5.652625561,9.448143959,0.892487168,65.90000153,0.876052618,0.028112838,0.881786108,0.85772413,0.275186718,South America
139
+ Peru,2019,5.999381542,9.460934639,0.809075952,68.40000153,0.814805925,-0.129735783,0.873601913,0.820448101,0.374985486,South America
140
+ Philippines,2020,5.079585075,9.061443329,0.781140387,62.09999847,0.932041705,-0.115542881,0.744283676,0.803562105,0.326889008,Asia
141
+ Poland,2020,6.139455318,10.37120342,0.95317173,70.09999847,0.767428696,-0.006559356,0.786873639,0.759842575,0.328937918,Europe
142
+ Portugal,2020,5.767792225,10.37082005,0.874990344,72.80000305,0.91313076,-0.238090202,0.867157161,0.647768855,0.382812679,Europe
143
+ Qatar,2015,6.374529362,11.48561478,,68.30000305,,,,,,Asia
144
+ Romania,2019,6.129942417,10.30591393,0.841905951,67.5,0.84754318,-0.221422106,0.954130709,0.697443366,0.243659228,Europe
145
+ Russia,2020,5.495288849,10.16223526,0.887020171,65.09999847,0.714466453,-0.070612296,0.823047519,0.645214975,0.189521536,Asia
146
+ Rwanda,2019,3.268152237,7.708060741,0.489458233,61.70000076,0.868999183,0.064065881,0.167970896,0.736067951,0.417667687,Africa
147
+ Saint Kitts and Nevis,,,,,,,,,,,North America
148
+ Saint Lucia,,,,,,,,,,,North America
149
+ Saint Vincent and the Grenadines,,,,,,,,,,,North America
150
+ Samoa,,,,,,,,,,,Oceania
151
+ San Marino,,,,,,,,,,,Europe
152
+ Sao Tome and Principe,,,,,,,,,,,Africa
153
+ Saudi Arabia,2020,6.559588432,10.70066261,0.890255928,66.90000153,0.884220123,-0.11053171,,0.753607631,0.251199067,Asia
154
+ Senegal,2019,5.488736629,8.130020142,0.687614083,60,0.758841753,-0.018803915,0.79567343,0.788973033,0.331925839,Africa
155
+ Serbia,2020,6.041546345,9.788259506,0.852101862,69,0.843479872,0.149401307,0.824472487,0.602846146,0.357580274,Europe
156
+ Seychelles,,,,,,,,,,,Africa
157
+ Sierra Leone,2019,3.447381496,7.449131966,0.610779762,52.40000153,0.717769563,0.074055701,0.873861432,0.513375223,0.438134462,Africa
158
+ Singapore,2019,6.378359795,11.48598003,0.924918354,77.09999847,0.938041747,0.027229678,0.069619603,0.722598016,0.138069153,Asia
159
+ Slovakia,2020,6.519098282,10.33151245,0.954159975,69.5,0.76189661,-0.074873514,0.900533676,0.763582885,0.274447888,Europe
160
+ Slovenia,2020,6.462076187,10.47786999,0.953437507,71.69999695,0.958442569,-0.08135689,0.796557486,0.609949231,0.313852519,Europe
161
+ Solomon Islands,,,,,,,,,,,Oceania
162
+ Somalia,2016,4.667941093,,0.594416559,50,0.917322814,,0.44080174,0.891423166,0.193282232,Africa
163
+ South Africa,2020,4.946800709,9.332463264,0.891050339,57.29999924,0.756946266,-0.014951312,0.912407219,0.820337772,0.294276476,Africa
164
+ South Korea,2020,5.792695522,10.64807415,0.807952285,74.19999695,0.711480439,-0.105867893,0.664694011,0.639555693,0.247059658,Asia
165
+ South Sudan,2017,2.816622496,,0.556822658,51,0.456011087,,0.761269629,0.585602164,0.517363787,Africa
166
+ Spain,2020,6.502175331,10.48805904,0.934934676,75,0.783256531,-0.120613314,0.729977489,0.686177611,0.316617101,Europe
167
+ Sri Lanka,2019,4.213299274,9.478693962,0.814939141,67.40000153,0.824277341,0.051186614,0.863342285,0.816390395,0.314542711,Asia
168
+ Sudan,2014,4.138672829,8.3170681,0.81061554,55.11999893,0.3900958,-0.063394643,0.793785036,0.540845037,0.302724987,Africa
169
+ Suriname,2012,6.269286633,9.797084808,0.797262073,62.24000168,0.885488451,-0.077173166,0.751282871,0.764222682,0.250364989,South America
170
+ Swaziland,2019,4.396114826,9.069709778,0.759097695,51.27039337,0.596682429,-0.190737918,0.723507762,0.777627289,0.279595166,Africa
171
+ Sweden,2020,7.314341068,10.83790398,0.93558234,72.80000305,0.951181591,0.09081845,0.203440145,0.766376078,0.22193329,Europe
172
+ Switzerland,2020,7.508435249,11.08089256,0.946316481,74.69999695,0.917343259,-0.063502058,0.280367136,0.768704712,0.19322899,Europe
173
+ Syria,2015,3.46191287,8.441536903,0.463912874,55.20000076,0.448270857,0.044834916,0.685236931,0.369439602,0.642588735,Asia
174
+ Taiwan,2020,6.751067638,,0.900832534,,0.798834741,,0.710567415,0.84539336,0.082736954,Asia
175
+ Tajikistan,2020,5.373398781,8.080356598,0.789744556,64.69999695,,-0.040467065,0.549786448,0.748897612,0.344161272,Asia
176
+ Tanzania,2020,3.785684109,7.881270409,0.739817083,58.5,0.830343485,0.295271993,0.520631671,0.685533106,0.271117926,Africa
177
+ Thailand,2020,5.884544373,9.76924324,0.866702616,67.59999847,0.840463281,0.273055583,0.918340027,0.783269882,0.326168567,Asia
178
+ Togo,2019,4.179493904,7.375211239,0.53870219,55.09999847,0.617419779,0.064774826,0.736675024,0.590229273,0.443869889,Africa
179
+ Tonga,,,,,,,,,,,Oceania
180
+ Trinidad and Tobago,2017,6.191859722,10.18292046,0.916029036,63.5,0.859140456,0.014855396,0.911336362,0.846467078,0.248098806,North America
181
+ Tunisia,2020,4.730811119,9.230624199,0.719013214,67.5,0.667758107,-0.201814234,0.877354085,0.584633887,0.438774347,Africa
182
+ Turkey,2020,4.861554146,10.21908379,0.856730223,67.59999847,0.510385871,-0.110888988,0.774417162,0.384292454,0.440387309,Asia
183
+ Turkmenistan,2019,5.474299908,9.651184082,0.981501758,62.59999847,0.891526878,0.284880638,,0.509914517,0.183343247,Asia
184
+ Tuvalu,,,,,,,,,,,Oceania
185
+ Uganda,2020,4.640909672,7.68445015,0.800461173,56.5,0.687482119,0.147117555,0.877587259,0.69894886,0.424706668,Africa
186
+ Ukraine,2020,5.269675732,9.427873611,0.884686291,65.19999695,0.784273446,0.126344204,0.945668995,0.687720656,0.284736186,Europe
187
+ United Arab Emirates,2020,6.458392143,11.05288982,0.826755583,67.5,0.9421615,0.060019661,,0.75165993,0.298480302,Asia
188
+ United Kingdom,2020,6.798177242,10.62581062,0.929353237,72.69999695,0.884624004,0.20250842,0.490203947,0.758163571,0.224655122,Europe
189
+ United States,2020,7.028088093,11.00065613,0.937369823,68.09999847,0.850447297,0.034103353,0.678124607,0.787371993,0.295499027,North America
190
+ Uruguay,2020,6.309681416,9.937191963,0.921070337,69.19999695,0.907761931,-0.083986901,0.491007835,0.807350934,0.264692068,South America
191
+ Uzbekistan,2019,6.154049397,8.853480339,0.915275931,65.40000153,0.970294535,0.304297596,0.511196852,0.844808519,0.219745517,Asia
192
+ Vanuatu,,,,,,,,,,,Oceania
193
+ Vatican City,,,,,,,,,,,Europe
194
+ Venezuela,2020,4.573829651,,0.80522424,66.90000153,0.611814618,,0.811319113,0.722391427,0.396250457,South America
195
+ Vietnam,2019,5.467451096,8.992330551,0.847592115,68.09999847,0.95246917,-0.125530764,0.787889242,0.751159906,0.18561019,Asia
196
+ Yemen,2019,4.196912766,,0.870042801,57.5,0.651308239,,0.798228264,0.54280591,0.213043228,Asia
197
+ Zambia,2020,4.837992191,8.116580009,0.766871631,56.29999924,0.750422418,0.056029193,0.809749782,0.691082239,0.344525933,Africa
198
+ Zimbabwe,2020,3.159802198,7.828756809,0.717242658,56.79999924,0.643302977,-0.008695764,0.78852278,0.702572763,0.345736384,Africa
Assets/Countries/countries.csv ADDED
@@ -0,0 +1,195 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Country,Continent
2
+ Algeria,Africa
3
+ Angola,Africa
4
+ Benin,Africa
5
+ Botswana,Africa
6
+ Burkina,Africa
7
+ Burundi,Africa
8
+ Cameroon,Africa
9
+ Cape Verde,Africa
10
+ Central African Republic,Africa
11
+ Chad,Africa
12
+ Comoros,Africa
13
+ Congo,Africa
14
+ "Congo, Democratic Republic of",Africa
15
+ Djibouti,Africa
16
+ Egypt,Africa
17
+ Equatorial Guinea,Africa
18
+ Eritrea,Africa
19
+ Ethiopia,Africa
20
+ Gabon,Africa
21
+ Gambia,Africa
22
+ Ghana,Africa
23
+ Guinea,Africa
24
+ Guinea-Bissau,Africa
25
+ Ivory Coast,Africa
26
+ Kenya,Africa
27
+ Lesotho,Africa
28
+ Liberia,Africa
29
+ Libya,Africa
30
+ Madagascar,Africa
31
+ Malawi,Africa
32
+ Mali,Africa
33
+ Mauritania,Africa
34
+ Mauritius,Africa
35
+ Morocco,Africa
36
+ Mozambique,Africa
37
+ Namibia,Africa
38
+ Niger,Africa
39
+ Nigeria,Africa
40
+ Rwanda,Africa
41
+ Sao Tome and Principe,Africa
42
+ Senegal,Africa
43
+ Seychelles,Africa
44
+ Sierra Leone,Africa
45
+ Somalia,Africa
46
+ South Africa,Africa
47
+ South Sudan,Africa
48
+ Sudan,Africa
49
+ Swaziland,Africa
50
+ Tanzania,Africa
51
+ Togo,Africa
52
+ Tunisia,Africa
53
+ Uganda,Africa
54
+ Zambia,Africa
55
+ Zimbabwe,Africa
56
+ Afghanistan,Asia
57
+ Bahrain,Asia
58
+ Bangladesh,Asia
59
+ Bhutan,Asia
60
+ Brunei,Asia
61
+ Burma (Myanmar),Asia
62
+ Cambodia,Asia
63
+ China,Asia
64
+ East Timor,Asia
65
+ India,Asia
66
+ Indonesia,Asia
67
+ Iran,Asia
68
+ Iraq,Asia
69
+ Israel,Asia
70
+ Japan,Asia
71
+ Jordan,Asia
72
+ Kazakhstan,Asia
73
+ "Korea, North",Asia
74
+ "Korea, South",Asia
75
+ Kuwait,Asia
76
+ Kyrgyzstan,Asia
77
+ Laos,Asia
78
+ Lebanon,Asia
79
+ Malaysia,Asia
80
+ Maldives,Asia
81
+ Mongolia,Asia
82
+ Nepal,Asia
83
+ Oman,Asia
84
+ Pakistan,Asia
85
+ Philippines,Asia
86
+ Qatar,Asia
87
+ Russian Federation,Asia
88
+ Saudi Arabia,Asia
89
+ Singapore,Asia
90
+ Sri Lanka,Asia
91
+ Syria,Asia
92
+ Tajikistan,Asia
93
+ Thailand,Asia
94
+ Turkey,Asia
95
+ Turkmenistan,Asia
96
+ United Arab Emirates,Asia
97
+ Uzbekistan,Asia
98
+ Vietnam,Asia
99
+ Yemen,Asia
100
+ Albania,Europe
101
+ Andorra,Europe
102
+ Armenia,Europe
103
+ Austria,Europe
104
+ Azerbaijan,Europe
105
+ Belarus,Europe
106
+ Belgium,Europe
107
+ Bosnia and Herzegovina,Europe
108
+ Bulgaria,Europe
109
+ Croatia,Europe
110
+ Cyprus,Europe
111
+ CZ,Europe
112
+ Denmark,Europe
113
+ Estonia,Europe
114
+ Finland,Europe
115
+ France,Europe
116
+ Georgia,Europe
117
+ Germany,Europe
118
+ Greece,Europe
119
+ Hungary,Europe
120
+ Iceland,Europe
121
+ Ireland,Europe
122
+ Italy,Europe
123
+ Latvia,Europe
124
+ Liechtenstein,Europe
125
+ Lithuania,Europe
126
+ Luxembourg,Europe
127
+ Macedonia,Europe
128
+ Malta,Europe
129
+ Moldova,Europe
130
+ Monaco,Europe
131
+ Montenegro,Europe
132
+ Netherlands,Europe
133
+ Norway,Europe
134
+ Poland,Europe
135
+ Portugal,Europe
136
+ Romania,Europe
137
+ San Marino,Europe
138
+ Serbia,Europe
139
+ Slovakia,Europe
140
+ Slovenia,Europe
141
+ Spain,Europe
142
+ Sweden,Europe
143
+ Switzerland,Europe
144
+ Ukraine,Europe
145
+ United Kingdom,Europe
146
+ Vatican City,Europe
147
+ Antigua and Barbuda,North America
148
+ Bahamas,North America
149
+ Barbados,North America
150
+ Belize,North America
151
+ Canada,North America
152
+ Costa Rica,North America
153
+ Cuba,North America
154
+ Dominica,North America
155
+ Dominican Republic,North America
156
+ El Salvador,North America
157
+ Grenada,North America
158
+ Guatemala,North America
159
+ Haiti,North America
160
+ Honduras,North America
161
+ Jamaica,North America
162
+ Mexico,North America
163
+ Nicaragua,North America
164
+ Panama,North America
165
+ Saint Kitts and Nevis,North America
166
+ Saint Lucia,North America
167
+ Saint Vincent and the Grenadines,North America
168
+ Trinidad and Tobago,North America
169
+ US,North America
170
+ Australia,Oceania
171
+ Fiji,Oceania
172
+ Kiribati,Oceania
173
+ Marshall Islands,Oceania
174
+ Micronesia,Oceania
175
+ Nauru,Oceania
176
+ New Zealand,Oceania
177
+ Palau,Oceania
178
+ Papua New Guinea,Oceania
179
+ Samoa,Oceania
180
+ Solomon Islands,Oceania
181
+ Tonga,Oceania
182
+ Tuvalu,Oceania
183
+ Vanuatu,Oceania
184
+ Argentina,South America
185
+ Bolivia,South America
186
+ Brazil,South America
187
+ Chile,South America
188
+ Colombia,South America
189
+ Ecuador,South America
190
+ Guyana,South America
191
+ Paraguay,South America
192
+ Peru,South America
193
+ Suriname,South America
194
+ Uruguay,South America
195
+ Venezuela,South America
Assets/IMC Expansion/US Protected Classes from IMC.csv ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Protected Category,Source,Words,Definition,Category
2
+ Age,IMC,aged,,Elderly
3
+ Age,IMC,young,,Youth
4
+ Age,IMC,old,,Elderly
5
+ Age,IMC,mature,,Elderly
6
+ Age,IMC,immature,,Youth
7
+ Age,IMC,child,,Youth
8
+ Age,IMC,juvenile,,Youth
9
+ Age,Addition,adolescent,,Youth
10
+ Age,Addition,kid,,Youth
11
+ Disability,IMC,blind,,Specific
12
+ Disability,IMC,deaf,,Specific
13
+ Disability,IMC,mobility,,General
14
+ Disability,IMC,handicap,,General
15
+ Disability,IMC,abled,,General
16
+ Disability,IMC,disability,,General
17
+ Disability,IMC,disabled,,General
18
+ Gender Identity,IMC,aab,,Assignment
19
+ Gender Identity,IMC,male,,Male
20
+ Gender Identity,IMC,female,,Female
21
+ Gender Identity,IMC,cis,A term used to describe a person whose gender identity aligns with those typically associated with the sex assigned to them at birth.,cis
22
+ Gender Identity,https://www.hrc.org/resources/glossary-of-terms,cisgender,A term used to describe a person whose gender identity aligns with those typically associated with the sex assigned to them at birth.,cis
23
+ Gender Identity,https://www.hrc.org/resources/glossary-of-terms,gender-fluid, A person who does not identify with a single fixed gender or has a fluid or unfixed gender identity.,non-binary
24
+ Gender Identity,https://www.hrc.org/resources/glossary-of-terms,Genderqueer,"Genderqueer people typically reject notions of static categories of gender and embrace a fluidity of gender identity and often, though not always, sexual orientation. People who identify as ""genderqueer"" may see themselves as being both male and female, neither male nor female or as falling completely outside these categories.",non-binary
25
+ Gender Identity,https://www.hrc.org/resources/glossary-of-terms,Intersex,"Intersex people are born with a variety of differences in their sex traits and reproductive anatomy. There is a wide variety of difference among intersex variations, including differences in genitalia, chromosomes, gonads, internal sex organs, hormone production, hormone response, and/or secondary sex traits.",Assignment
26
+ Gender Identity,https://www.hrc.org/resources/glossary-of-terms,Sex assigned at birth,"The sex, male, female or intersex, that a doctor or midwife uses to describe a child at birth based on their external anatomy.",Assignment
27
+ Gender Identity,IMC,many-genders,,many-genders
28
+ Gender Identity,IMC,no-gender,,no-gender
29
+ Gender Identity,IMC,non-binary,An adjective describing a person who does not identify exclusively as a man or a woman.,non-binary
30
+ Gender Identity,IMC,trans-woman,,trans
31
+ Gender Identity,IMC,trans-man,,trans
32
+ Gender Identity,IMC,trans-gender,,trans
33
+ Gender Identity,IMC,afab,,Assignment
34
+ Gender Identity,IMC,amab,,Assignment
35
+ Gender Identity,https://techland.time.com/2014/02/14/a-comprehensive-guide-to-facebooks-new-options-for-gender-identity/,bigender,A person identifies with two distinct genders.,bi-gender
36
+ Gender Identity,https://techland.time.com/2014/02/14/a-comprehensive-guide-to-facebooks-new-options-for-gender-identity/,cis female,,cis
37
+ Gender Identity,https://techland.time.com/2014/02/14/a-comprehensive-guide-to-facebooks-new-options-for-gender-identity/,cis male,,cis
38
+ Gender Identity,https://techland.time.com/2014/02/14/a-comprehensive-guide-to-facebooks-new-options-for-gender-identity/,cis man,,cis
39
+ Gender Identity,https://techland.time.com/2014/02/14/a-comprehensive-guide-to-facebooks-new-options-for-gender-identity/,cis woman,,cis
40
+ Gender Identity,https://techland.time.com/2014/02/14/a-comprehensive-guide-to-facebooks-new-options-for-gender-identity/,female to male,,trans
41
+ Gender Identity,https://techland.time.com/2014/02/14/a-comprehensive-guide-to-facebooks-new-options-for-gender-identity/,male to female,,trans
42
+ Gender Identity,https://techland.time.com/2014/02/14/a-comprehensive-guide-to-facebooks-new-options-for-gender-identity/,FTM,,trans
43
+ Gender Identity,https://techland.time.com/2014/02/14/a-comprehensive-guide-to-facebooks-new-options-for-gender-identity/,MTF,,trans
44
+ Gender Identity,https://techland.time.com/2014/02/14/a-comprehensive-guide-to-facebooks-new-options-for-gender-identity/,neutrois,"Killerman says this French-esque term, pronounced noo-TWA, is relatively new. It’s used by people who see themselves as gender neutral, people who don’t feel any gender is a big component of their identity.",no-gender
45
+ Gender Identity,https://techland.time.com/2014/02/14/a-comprehensive-guide-to-facebooks-new-options-for-gender-identity/,agender,One sense of prefix a- is “without.”,no-gender
46
+ Gender Identity,https://techland.time.com/2014/02/14/a-comprehensive-guide-to-facebooks-new-options-for-gender-identity/,pangender,"A pangender Facebook user probably sees themselves as “a little bit of everything in the sexual catalog,” Killermann says.",many-genders
47
+ Gender Identity,https://techland.time.com/2014/02/14/a-comprehensive-guide-to-facebooks-new-options-for-gender-identity/,two-spirit,"This term, Killermann says, comes from Native American culture, describing someone who embodies both the spirits of a man and a woman.",bi-gender
48
+ Gender Identity,https://techland.time.com/2014/02/14/a-comprehensive-guide-to-facebooks-new-options-for-gender-identity/,two spirit,"This term, Killermann says, comes from Native American culture, describing someone who embodies both the spirits of a man and a woman.",bi-gender
49
+ Gender Identity,https://techland.time.com/2014/02/14/a-comprehensive-guide-to-facebooks-new-options-for-gender-identity/,androgynous,,
50
+ Gender Identity,https://techland.time.com/2014/02/14/a-comprehensive-guide-to-facebooks-new-options-for-gender-identity/,androgyne,,
51
+ Gender Identity,https://techland.time.com/2014/02/14/a-comprehensive-guide-to-facebooks-new-options-for-gender-identity/,trans,A trans person does not identify with the gender that corresponds to the sex they were assigned at birth.,trans
52
+ Gender Identity,https://techland.time.com/2014/02/14/a-comprehensive-guide-to-facebooks-new-options-for-gender-identity/,trans woman,A person who was assigned the male sex at birth but identifies as female.,trans
53
+ Gender Identity,https://techland.time.com/2014/02/14/a-comprehensive-guide-to-facebooks-new-options-for-gender-identity/,trans man,,trans
54
+ Sex,IMC,maternity,,Pregnancy
55
+ Sex,IMC,motherhood,,Pregnancy
56
+ Sex,IMC,motherhood,,Pregnancy
57
+ Sex,IMC,parental,,Pregnancy
58
+ Sex,IMC,pregnant,,Pregnancy
59
+ Race,IMC,black,,Black
60
+ Race,IMC,indian,,Native American
61
+ Race,IMC,indigenous,,Native American
62
+ Race,IMC,asian,,AAPI
63
+ Race,IMC,hispanic,,Hispanic
64
+ Race,IMC,islander,,AAPI
65
+ Race,IMC,white,,White
66
+ Race,IMC,european,,White
67
+ Race,IMC,african,,Black
68
+ Sexual Orientation,IMC,asexual,"Often called “ace” for short, asexual refers to a complete or partial lack of sexual attraction or lack of interest in sexual activity with others. Asexuality exists on a spectrum, and asexual people may experience no, little or conditional sexual attraction.",asexual
69
+ Sexual Orientation,IMC,homosexual,,homosexual
70
+ Sexual Orientation,IMC,heterosexual,,heterosexual
71
+ Sexual Orientation,IMC,bisexual,"A person emotionally, romantically or sexually attracted to more than one sex, gender or gender identity though not necessarily simultaneously, in the same way or to the same degree. Sometimes used interchangeably with pansexual.",bisexual
72
+ Sexual Orientation,IMC,pansexual,,pansexual
73
+ Sexual Orientation,https://www.hrc.org/resources/glossary-of-terms,gay,"A person who is emotionally, romantically or sexually attracted to members of the same gender. Men, women and non-binary people may use this term to describe themselves.",homosexual
74
+ Veteran,IMC,Veteran,,
Assets/Professions/.ipynb_checkpoints/Standard_Occupational_Classifications_Orgin-checkpoint.md ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
1
+ # Where did this data come from?
2
+
3
+ In looking for a solid list, I determined that the US Bureau of Labor Statistics would provide an excellent starting point for comprehensive listings of titles. This data can be found at [Standard Occupational Classifications in 2018](https://www.bls.gov/soc/2018/home.htm). Specifically, I made use of their [Direct Match Title File](https://www.bls.gov/soc/2018/home.htm#match), because it seemed to have the most comprehensive list and provided SOC categories.
4
+
5
+ Here's the Header from the file:
6
+ > U.S. Bureau of Labor Statistics
7
+ > On behalf of the Office of Management and Budget (OMB) and the Standard Occupational Classification Policy Committee (SOCPC)
8
+ > November 2017 (Updated April 15, 2020)
9
+ > ***Questions should be emailed to soc@bls.gov***
Assets/Professions/.ipynb_checkpoints/clean-SOC-2018-checkpoint.ipynb ADDED
@@ -0,0 +1,558 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "id": "08cf1c6f-0895-4e7b-9279-109c55dd6596",
7
+ "metadata": {},
8
+ "outputs": [],
9
+ "source": [
10
+ "import pandas as pd, spacy, nltk, numpy as np, re, ssl"
11
+ ]
12
+ },
13
+ {
14
+ "cell_type": "code",
15
+ "execution_count": 52,
16
+ "id": "e3a83c6d-bfb4-4aa2-a9dd-a4fd7ffe6d03",
17
+ "metadata": {},
18
+ "outputs": [],
19
+ "source": [
20
+ "df = pd.read_csv(\"soc_2018_direct_match_title_file.csv\")"
21
+ ]
22
+ },
23
+ {
24
+ "cell_type": "code",
25
+ "execution_count": 53,
26
+ "id": "afa91f8f-d7f6-47a0-adc3-b21866acc2fa",
27
+ "metadata": {},
28
+ "outputs": [
29
+ {
30
+ "data": {
31
+ "text/html": [
32
+ "<div>\n",
33
+ "<style scoped>\n",
34
+ " .dataframe tbody tr th:only-of-type {\n",
35
+ " vertical-align: middle;\n",
36
+ " }\n",
37
+ "\n",
38
+ " .dataframe tbody tr th {\n",
39
+ " vertical-align: top;\n",
40
+ " }\n",
41
+ "\n",
42
+ " .dataframe thead th {\n",
43
+ " text-align: right;\n",
44
+ " }\n",
45
+ "</style>\n",
46
+ "<table border=\"1\" class=\"dataframe\">\n",
47
+ " <thead>\n",
48
+ " <tr style=\"text-align: right;\">\n",
49
+ " <th></th>\n",
50
+ " <th>2018 SOC Code</th>\n",
51
+ " <th>2018 SOC Title</th>\n",
52
+ " <th>2018 SOC Direct Match Title</th>\n",
53
+ " <th>Illustrative Example</th>\n",
54
+ " </tr>\n",
55
+ " </thead>\n",
56
+ " <tbody>\n",
57
+ " <tr>\n",
58
+ " <th>0</th>\n",
59
+ " <td>11-1011</td>\n",
60
+ " <td>Chief Executives</td>\n",
61
+ " <td>Admiral</td>\n",
62
+ " <td>x</td>\n",
63
+ " </tr>\n",
64
+ " <tr>\n",
65
+ " <th>1</th>\n",
66
+ " <td>11-1011</td>\n",
67
+ " <td>Chief Executives</td>\n",
68
+ " <td>CEO</td>\n",
69
+ " <td>NaN</td>\n",
70
+ " </tr>\n",
71
+ " <tr>\n",
72
+ " <th>2</th>\n",
73
+ " <td>11-1011</td>\n",
74
+ " <td>Chief Executives</td>\n",
75
+ " <td>Chief Executive Officer</td>\n",
76
+ " <td>NaN</td>\n",
77
+ " </tr>\n",
78
+ " <tr>\n",
79
+ " <th>3</th>\n",
80
+ " <td>11-1011</td>\n",
81
+ " <td>Chief Executives</td>\n",
82
+ " <td>Chief Financial Officer</td>\n",
83
+ " <td>x</td>\n",
84
+ " </tr>\n",
85
+ " <tr>\n",
86
+ " <th>4</th>\n",
87
+ " <td>11-1011</td>\n",
88
+ " <td>Chief Executives</td>\n",
89
+ " <td>Chief Operating Officer</td>\n",
90
+ " <td>x</td>\n",
91
+ " </tr>\n",
92
+ " </tbody>\n",
93
+ "</table>\n",
94
+ "</div>"
95
+ ],
96
+ "text/plain": [
97
+ " 2018 SOC Code 2018 SOC Title 2018 SOC Direct Match Title \\\n",
98
+ "0 11-1011 Chief Executives Admiral \n",
99
+ "1 11-1011 Chief Executives CEO \n",
100
+ "2 11-1011 Chief Executives Chief Executive Officer \n",
101
+ "3 11-1011 Chief Executives Chief Financial Officer \n",
102
+ "4 11-1011 Chief Executives Chief Operating Officer \n",
103
+ "\n",
104
+ " Illustrative Example \n",
105
+ "0 x \n",
106
+ "1 NaN \n",
107
+ "2 NaN \n",
108
+ "3 x \n",
109
+ "4 x "
110
+ ]
111
+ },
112
+ "execution_count": 53,
113
+ "metadata": {},
114
+ "output_type": "execute_result"
115
+ }
116
+ ],
117
+ "source": [
118
+ "df.head()"
119
+ ]
120
+ },
121
+ {
122
+ "cell_type": "code",
123
+ "execution_count": 54,
124
+ "id": "c2cc8198-f1ba-4318-b4f0-ae2d525290ff",
125
+ "metadata": {},
126
+ "outputs": [],
127
+ "source": [
128
+ "df = df.drop(\"Illustrative Example\", axis=1)"
129
+ ]
130
+ },
131
+ {
132
+ "cell_type": "code",
133
+ "execution_count": 55,
134
+ "id": "020c3356-8263-47af-b6e3-bf6d27bfee78",
135
+ "metadata": {},
136
+ "outputs": [
137
+ {
138
+ "data": {
139
+ "text/html": [
140
+ "<div>\n",
141
+ "<style scoped>\n",
142
+ " .dataframe tbody tr th:only-of-type {\n",
143
+ " vertical-align: middle;\n",
144
+ " }\n",
145
+ "\n",
146
+ " .dataframe tbody tr th {\n",
147
+ " vertical-align: top;\n",
148
+ " }\n",
149
+ "\n",
150
+ " .dataframe thead th {\n",
151
+ " text-align: right;\n",
152
+ " }\n",
153
+ "</style>\n",
154
+ "<table border=\"1\" class=\"dataframe\">\n",
155
+ " <thead>\n",
156
+ " <tr style=\"text-align: right;\">\n",
157
+ " <th></th>\n",
158
+ " <th>2018 SOC Code</th>\n",
159
+ " <th>2018 SOC Title</th>\n",
160
+ " <th>2018 SOC Direct Match Title</th>\n",
161
+ " </tr>\n",
162
+ " </thead>\n",
163
+ " <tbody>\n",
164
+ " <tr>\n",
165
+ " <th>0</th>\n",
166
+ " <td>11-1011</td>\n",
167
+ " <td>Chief Executives</td>\n",
168
+ " <td>Admiral</td>\n",
169
+ " </tr>\n",
170
+ " <tr>\n",
171
+ " <th>1</th>\n",
172
+ " <td>11-1011</td>\n",
173
+ " <td>Chief Executives</td>\n",
174
+ " <td>CEO</td>\n",
175
+ " </tr>\n",
176
+ " <tr>\n",
177
+ " <th>2</th>\n",
178
+ " <td>11-1011</td>\n",
179
+ " <td>Chief Executives</td>\n",
180
+ " <td>Chief Executive Officer</td>\n",
181
+ " </tr>\n",
182
+ " <tr>\n",
183
+ " <th>3</th>\n",
184
+ " <td>11-1011</td>\n",
185
+ " <td>Chief Executives</td>\n",
186
+ " <td>Chief Financial Officer</td>\n",
187
+ " </tr>\n",
188
+ " <tr>\n",
189
+ " <th>4</th>\n",
190
+ " <td>11-1011</td>\n",
191
+ " <td>Chief Executives</td>\n",
192
+ " <td>Chief Operating Officer</td>\n",
193
+ " </tr>\n",
194
+ " </tbody>\n",
195
+ "</table>\n",
196
+ "</div>"
197
+ ],
198
+ "text/plain": [
199
+ " 2018 SOC Code 2018 SOC Title 2018 SOC Direct Match Title\n",
200
+ "0 11-1011 Chief Executives Admiral\n",
201
+ "1 11-1011 Chief Executives CEO\n",
202
+ "2 11-1011 Chief Executives Chief Executive Officer\n",
203
+ "3 11-1011 Chief Executives Chief Financial Officer\n",
204
+ "4 11-1011 Chief Executives Chief Operating Officer"
205
+ ]
206
+ },
207
+ "execution_count": 55,
208
+ "metadata": {},
209
+ "output_type": "execute_result"
210
+ }
211
+ ],
212
+ "source": [
213
+ "df.head()"
214
+ ]
215
+ },
216
+ {
217
+ "cell_type": "code",
218
+ "execution_count": 56,
219
+ "id": "538a8047-9de8-4d29-961c-6b008c298e67",
220
+ "metadata": {},
221
+ "outputs": [],
222
+ "source": [
223
+ "df[\"Major\"] = df[\"2018 SOC Code\"].apply(lambda x: x[:2]).apply(int)"
224
+ ]
225
+ },
226
+ {
227
+ "cell_type": "code",
228
+ "execution_count": 57,
229
+ "id": "5969d5bc-69a5-42f6-a774-73a28e85b019",
230
+ "metadata": {},
231
+ "outputs": [],
232
+ "source": [
233
+ "# https://www.bls.gov/soc/2018/soc_2018_class_and_coding_structure.pdf determines the categorization.\n",
234
+ "def high_level_agg(number):\n",
235
+ " if 11 <= number <= 29:\n",
236
+ " category = \"Management, Business, Science, and Arts Occupations\"\n",
237
+ " elif 31 <= number <= 39:\n",
238
+ " category = \"Service Occupations\"\n",
239
+ " elif 41 <= number <= 43:\n",
240
+ " category = \"Sales and Office Occupations\"\n",
241
+ " elif 45 <= number <= 49:\n",
242
+ " category = \"Natural Resources, Construction, and Maintenance Occupations\"\n",
243
+ " elif 51 <= number <= 53:\n",
244
+ " category = \"Production, Transportation, and Material Moving Occupations\"\n",
245
+ " else:\n",
246
+ " category = \"Military Specific Occupations\"\n",
247
+ " return category"
248
+ ]
249
+ },
250
+ {
251
+ "cell_type": "code",
252
+ "execution_count": 58,
253
+ "id": "ebd35a6d-e0cd-497f-9c0b-9acf24de25dc",
254
+ "metadata": {},
255
+ "outputs": [
256
+ {
257
+ "data": {
258
+ "text/plain": [
259
+ "array([11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, 33, 35, 37, 39, 41, 43,\n",
260
+ " 45, 47, 49, 51, 53, 55])"
261
+ ]
262
+ },
263
+ "execution_count": 58,
264
+ "metadata": {},
265
+ "output_type": "execute_result"
266
+ }
267
+ ],
268
+ "source": [
269
+ "df.Major.unique()"
270
+ ]
271
+ },
272
+ {
273
+ "cell_type": "code",
274
+ "execution_count": 59,
275
+ "id": "729a6707-e442-4ad4-ad50-c6f701e00757",
276
+ "metadata": {},
277
+ "outputs": [],
278
+ "source": [
279
+ "df[\"high_level\"] = df.Major.apply(high_level_agg)"
280
+ ]
281
+ },
282
+ {
283
+ "cell_type": "code",
284
+ "execution_count": 60,
285
+ "id": "8017e2e0-5635-47fc-bef6-be13e6988177",
286
+ "metadata": {},
287
+ "outputs": [
288
+ {
289
+ "data": {
290
+ "text/html": [
291
+ "<div>\n",
292
+ "<style scoped>\n",
293
+ " .dataframe tbody tr th:only-of-type {\n",
294
+ " vertical-align: middle;\n",
295
+ " }\n",
296
+ "\n",
297
+ " .dataframe tbody tr th {\n",
298
+ " vertical-align: top;\n",
299
+ " }\n",
300
+ "\n",
301
+ " .dataframe thead th {\n",
302
+ " text-align: right;\n",
303
+ " }\n",
304
+ "</style>\n",
305
+ "<table border=\"1\" class=\"dataframe\">\n",
306
+ " <thead>\n",
307
+ " <tr style=\"text-align: right;\">\n",
308
+ " <th></th>\n",
309
+ " <th>2018 SOC Code</th>\n",
310
+ " <th>2018 SOC Title</th>\n",
311
+ " <th>2018 SOC Direct Match Title</th>\n",
312
+ " <th>Major</th>\n",
313
+ " <th>high_level</th>\n",
314
+ " </tr>\n",
315
+ " </thead>\n",
316
+ " <tbody>\n",
317
+ " <tr>\n",
318
+ " <th>0</th>\n",
319
+ " <td>11-1011</td>\n",
320
+ " <td>Chief Executives</td>\n",
321
+ " <td>Admiral</td>\n",
322
+ " <td>11</td>\n",
323
+ " <td>Management, Business, Science, and Arts Occupa...</td>\n",
324
+ " </tr>\n",
325
+ " <tr>\n",
326
+ " <th>1</th>\n",
327
+ " <td>11-1011</td>\n",
328
+ " <td>Chief Executives</td>\n",
329
+ " <td>CEO</td>\n",
330
+ " <td>11</td>\n",
331
+ " <td>Management, Business, Science, and Arts Occupa...</td>\n",
332
+ " </tr>\n",
333
+ " <tr>\n",
334
+ " <th>2</th>\n",
335
+ " <td>11-1011</td>\n",
336
+ " <td>Chief Executives</td>\n",
337
+ " <td>Chief Executive Officer</td>\n",
338
+ " <td>11</td>\n",
339
+ " <td>Management, Business, Science, and Arts Occupa...</td>\n",
340
+ " </tr>\n",
341
+ " <tr>\n",
342
+ " <th>3</th>\n",
343
+ " <td>11-1011</td>\n",
344
+ " <td>Chief Executives</td>\n",
345
+ " <td>Chief Financial Officer</td>\n",
346
+ " <td>11</td>\n",
347
+ " <td>Management, Business, Science, and Arts Occupa...</td>\n",
348
+ " </tr>\n",
349
+ " <tr>\n",
350
+ " <th>4</th>\n",
351
+ " <td>11-1011</td>\n",
352
+ " <td>Chief Executives</td>\n",
353
+ " <td>Chief Operating Officer</td>\n",
354
+ " <td>11</td>\n",
355
+ " <td>Management, Business, Science, and Arts Occupa...</td>\n",
356
+ " </tr>\n",
357
+ " </tbody>\n",
358
+ "</table>\n",
359
+ "</div>"
360
+ ],
361
+ "text/plain": [
362
+ " 2018 SOC Code 2018 SOC Title 2018 SOC Direct Match Title Major \\\n",
363
+ "0 11-1011 Chief Executives Admiral 11 \n",
364
+ "1 11-1011 Chief Executives CEO 11 \n",
365
+ "2 11-1011 Chief Executives Chief Executive Officer 11 \n",
366
+ "3 11-1011 Chief Executives Chief Financial Officer 11 \n",
367
+ "4 11-1011 Chief Executives Chief Operating Officer 11 \n",
368
+ "\n",
369
+ " high_level \n",
370
+ "0 Management, Business, Science, and Arts Occupa... \n",
371
+ "1 Management, Business, Science, and Arts Occupa... \n",
372
+ "2 Management, Business, Science, and Arts Occupa... \n",
373
+ "3 Management, Business, Science, and Arts Occupa... \n",
374
+ "4 Management, Business, Science, and Arts Occupa... "
375
+ ]
376
+ },
377
+ "execution_count": 60,
378
+ "metadata": {},
379
+ "output_type": "execute_result"
380
+ }
381
+ ],
382
+ "source": [
383
+ "df.head()"
384
+ ]
385
+ },
386
+ {
387
+ "cell_type": "code",
388
+ "execution_count": 61,
389
+ "id": "885a1379-3795-4e52-a6a6-b1f03476101e",
390
+ "metadata": {},
391
+ "outputs": [],
392
+ "source": [
393
+ "names = {\"2018 SOC Code\":\"SOC_code\", \"2018 SOC Title\": \"Category\", \"2018 SOC Direct Match Title\":\"Words\"}"
394
+ ]
395
+ },
396
+ {
397
+ "cell_type": "code",
398
+ "execution_count": 62,
399
+ "id": "b77202c7-8e4a-4bed-bc89-e7f146e857ba",
400
+ "metadata": {},
401
+ "outputs": [],
402
+ "source": [
403
+ "df = df.rename(columns=names)"
404
+ ]
405
+ },
406
+ {
407
+ "cell_type": "code",
408
+ "execution_count": 63,
409
+ "id": "7035d6dc-0638-4069-8a17-074b7bab5366",
410
+ "metadata": {},
411
+ "outputs": [
412
+ {
413
+ "data": {
414
+ "text/html": [
415
+ "<div>\n",
416
+ "<style scoped>\n",
417
+ " .dataframe tbody tr th:only-of-type {\n",
418
+ " vertical-align: middle;\n",
419
+ " }\n",
420
+ "\n",
421
+ " .dataframe tbody tr th {\n",
422
+ " vertical-align: top;\n",
423
+ " }\n",
424
+ "\n",
425
+ " .dataframe thead th {\n",
426
+ " text-align: right;\n",
427
+ " }\n",
428
+ "</style>\n",
429
+ "<table border=\"1\" class=\"dataframe\">\n",
430
+ " <thead>\n",
431
+ " <tr style=\"text-align: right;\">\n",
432
+ " <th></th>\n",
433
+ " <th>SOC_code</th>\n",
434
+ " <th>Category</th>\n",
435
+ " <th>Words</th>\n",
436
+ " <th>Major</th>\n",
437
+ " <th>high_level</th>\n",
438
+ " </tr>\n",
439
+ " </thead>\n",
440
+ " <tbody>\n",
441
+ " <tr>\n",
442
+ " <th>0</th>\n",
443
+ " <td>11-1011</td>\n",
444
+ " <td>Chief Executives</td>\n",
445
+ " <td>Admiral</td>\n",
446
+ " <td>11</td>\n",
447
+ " <td>Management, Business, Science, and Arts Occupa...</td>\n",
448
+ " </tr>\n",
449
+ " <tr>\n",
450
+ " <th>1</th>\n",
451
+ " <td>11-1011</td>\n",
452
+ " <td>Chief Executives</td>\n",
453
+ " <td>CEO</td>\n",
454
+ " <td>11</td>\n",
455
+ " <td>Management, Business, Science, and Arts Occupa...</td>\n",
456
+ " </tr>\n",
457
+ " <tr>\n",
458
+ " <th>2</th>\n",
459
+ " <td>11-1011</td>\n",
460
+ " <td>Chief Executives</td>\n",
461
+ " <td>Chief Executive Officer</td>\n",
462
+ " <td>11</td>\n",
463
+ " <td>Management, Business, Science, and Arts Occupa...</td>\n",
464
+ " </tr>\n",
465
+ " <tr>\n",
466
+ " <th>3</th>\n",
467
+ " <td>11-1011</td>\n",
468
+ " <td>Chief Executives</td>\n",
469
+ " <td>Chief Financial Officer</td>\n",
470
+ " <td>11</td>\n",
471
+ " <td>Management, Business, Science, and Arts Occupa...</td>\n",
472
+ " </tr>\n",
473
+ " <tr>\n",
474
+ " <th>4</th>\n",
475
+ " <td>11-1011</td>\n",
476
+ " <td>Chief Executives</td>\n",
477
+ " <td>Chief Operating Officer</td>\n",
478
+ " <td>11</td>\n",
479
+ " <td>Management, Business, Science, and Arts Occupa...</td>\n",
480
+ " </tr>\n",
481
+ " </tbody>\n",
482
+ "</table>\n",
483
+ "</div>"
484
+ ],
485
+ "text/plain": [
486
+ " SOC_code Category Words Major \\\n",
487
+ "0 11-1011 Chief Executives Admiral 11 \n",
488
+ "1 11-1011 Chief Executives CEO 11 \n",
489
+ "2 11-1011 Chief Executives Chief Executive Officer 11 \n",
490
+ "3 11-1011 Chief Executives Chief Financial Officer 11 \n",
491
+ "4 11-1011 Chief Executives Chief Operating Officer 11 \n",
492
+ "\n",
493
+ " high_level \n",
494
+ "0 Management, Business, Science, and Arts Occupa... \n",
495
+ "1 Management, Business, Science, and Arts Occupa... \n",
496
+ "2 Management, Business, Science, and Arts Occupa... \n",
497
+ "3 Management, Business, Science, and Arts Occupa... \n",
498
+ "4 Management, Business, Science, and Arts Occupa... "
499
+ ]
500
+ },
501
+ "execution_count": 63,
502
+ "metadata": {},
503
+ "output_type": "execute_result"
504
+ }
505
+ ],
506
+ "source": [
507
+ "df.head()"
508
+ ]
509
+ },
510
+ {
511
+ "cell_type": "code",
512
+ "execution_count": 64,
513
+ "id": "3f8c4a84-a50e-4dfe-9448-ac69c00750f4",
514
+ "metadata": {},
515
+ "outputs": [],
516
+ "source": [
517
+ "df.to_csv(\"soc-professions-2018.csv\")"
518
+ ]
519
+ },
520
+ {
521
+ "cell_type": "code",
522
+ "execution_count": null,
523
+ "id": "753cbdaf-41a5-4665-b13f-145702b293ec",
524
+ "metadata": {},
525
+ "outputs": [],
526
+ "source": []
527
+ },
528
+ {
529
+ "cell_type": "code",
530
+ "execution_count": null,
531
+ "id": "b44845e3-5a9f-4009-894c-a8e7b43b4d1b",
532
+ "metadata": {},
533
+ "outputs": [],
534
+ "source": []
535
+ }
536
+ ],
537
+ "metadata": {
538
+ "kernelspec": {
539
+ "display_name": "Python 3 (ipykernel)",
540
+ "language": "python",
541
+ "name": "python3"
542
+ },
543
+ "language_info": {
544
+ "codemirror_mode": {
545
+ "name": "ipython",
546
+ "version": 3
547
+ },
548
+ "file_extension": ".py",
549
+ "mimetype": "text/x-python",
550
+ "name": "python",
551
+ "nbconvert_exporter": "python",
552
+ "pygments_lexer": "ipython3",
553
+ "version": "3.8.8"
554
+ }
555
+ },
556
+ "nbformat": 4,
557
+ "nbformat_minor": 5
558
+ }
Assets/Professions/.ipynb_checkpoints/soc-professions-2018-checkpoint.csv ADDED
The diff for this file is too large to render. See raw diff
Assets/Professions/.ipynb_checkpoints/soc_2018_direct_match_title_file-checkpoint.csv ADDED
The diff for this file is too large to render. See raw diff
Assets/Professions/Standard_Occupational_Classifications_Orgin.md ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
1
+ # Where did this data come from?
2
+
3
+ In looking for a solid list, I determined that the US Bureau of Labor Statistics would provide an excellent starting point for comprehensive listings of titles. This data can be found at [Standard Occupational Classifications in 2018](https://www.bls.gov/soc/2018/home.htm). Specifically, I made use of their [Direct Match Title File](https://www.bls.gov/soc/2018/home.htm#match), because it seemed to have the most comprehensive list and provided SOC categories.
4
+
5
+ Here's the Header from the file:
6
+ > U.S. Bureau of Labor Statistics
7
+ > On behalf of the Office of Management and Budget (OMB) and the Standard Occupational Classification Policy Committee (SOCPC)
8
+ > November 2017 (Updated April 15, 2020)
9
+ > ***Questions should be emailed to soc@bls.gov***
Assets/Professions/clean-SOC-2018.ipynb ADDED
@@ -0,0 +1,558 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "id": "08cf1c6f-0895-4e7b-9279-109c55dd6596",
7
+ "metadata": {},
8
+ "outputs": [],
9
+ "source": [
10
+ "import pandas as pd, spacy, nltk, numpy as np, re, ssl"
11
+ ]
12
+ },
13
+ {
14
+ "cell_type": "code",
15
+ "execution_count": 52,
16
+ "id": "e3a83c6d-bfb4-4aa2-a9dd-a4fd7ffe6d03",
17
+ "metadata": {},
18
+ "outputs": [],
19
+ "source": [
20
+ "df = pd.read_csv(\"soc_2018_direct_match_title_file.csv\")"
21
+ ]
22
+ },
23
+ {
24
+ "cell_type": "code",
25
+ "execution_count": 53,
26
+ "id": "afa91f8f-d7f6-47a0-adc3-b21866acc2fa",
27
+ "metadata": {},
28
+ "outputs": [
29
+ {
30
+ "data": {
31
+ "text/html": [
32
+ "<div>\n",
33
+ "<style scoped>\n",
34
+ " .dataframe tbody tr th:only-of-type {\n",
35
+ " vertical-align: middle;\n",
36
+ " }\n",
37
+ "\n",
38
+ " .dataframe tbody tr th {\n",
39
+ " vertical-align: top;\n",
40
+ " }\n",
41
+ "\n",
42
+ " .dataframe thead th {\n",
43
+ " text-align: right;\n",
44
+ " }\n",
45
+ "</style>\n",
46
+ "<table border=\"1\" class=\"dataframe\">\n",
47
+ " <thead>\n",
48
+ " <tr style=\"text-align: right;\">\n",
49
+ " <th></th>\n",
50
+ " <th>2018 SOC Code</th>\n",
51
+ " <th>2018 SOC Title</th>\n",
52
+ " <th>2018 SOC Direct Match Title</th>\n",
53
+ " <th>Illustrative Example</th>\n",
54
+ " </tr>\n",
55
+ " </thead>\n",
56
+ " <tbody>\n",
57
+ " <tr>\n",
58
+ " <th>0</th>\n",
59
+ " <td>11-1011</td>\n",
60
+ " <td>Chief Executives</td>\n",
61
+ " <td>Admiral</td>\n",
62
+ " <td>x</td>\n",
63
+ " </tr>\n",
64
+ " <tr>\n",
65
+ " <th>1</th>\n",
66
+ " <td>11-1011</td>\n",
67
+ " <td>Chief Executives</td>\n",
68
+ " <td>CEO</td>\n",
69
+ " <td>NaN</td>\n",
70
+ " </tr>\n",
71
+ " <tr>\n",
72
+ " <th>2</th>\n",
73
+ " <td>11-1011</td>\n",
74
+ " <td>Chief Executives</td>\n",
75
+ " <td>Chief Executive Officer</td>\n",
76
+ " <td>NaN</td>\n",
77
+ " </tr>\n",
78
+ " <tr>\n",
79
+ " <th>3</th>\n",
80
+ " <td>11-1011</td>\n",
81
+ " <td>Chief Executives</td>\n",
82
+ " <td>Chief Financial Officer</td>\n",
83
+ " <td>x</td>\n",
84
+ " </tr>\n",
85
+ " <tr>\n",
86
+ " <th>4</th>\n",
87
+ " <td>11-1011</td>\n",
88
+ " <td>Chief Executives</td>\n",
89
+ " <td>Chief Operating Officer</td>\n",
90
+ " <td>x</td>\n",
91
+ " </tr>\n",
92
+ " </tbody>\n",
93
+ "</table>\n",
94
+ "</div>"
95
+ ],
96
+ "text/plain": [
97
+ " 2018 SOC Code 2018 SOC Title 2018 SOC Direct Match Title \\\n",
98
+ "0 11-1011 Chief Executives Admiral \n",
99
+ "1 11-1011 Chief Executives CEO \n",
100
+ "2 11-1011 Chief Executives Chief Executive Officer \n",
101
+ "3 11-1011 Chief Executives Chief Financial Officer \n",
102
+ "4 11-1011 Chief Executives Chief Operating Officer \n",
103
+ "\n",
104
+ " Illustrative Example \n",
105
+ "0 x \n",
106
+ "1 NaN \n",
107
+ "2 NaN \n",
108
+ "3 x \n",
109
+ "4 x "
110
+ ]
111
+ },
112
+ "execution_count": 53,
113
+ "metadata": {},
114
+ "output_type": "execute_result"
115
+ }
116
+ ],
117
+ "source": [
118
+ "df.head()"
119
+ ]
120
+ },
121
+ {
122
+ "cell_type": "code",
123
+ "execution_count": 54,
124
+ "id": "c2cc8198-f1ba-4318-b4f0-ae2d525290ff",
125
+ "metadata": {},
126
+ "outputs": [],
127
+ "source": [
128
+ "df = df.drop(\"Illustrative Example\", axis=1)"
129
+ ]
130
+ },
131
+ {
132
+ "cell_type": "code",
133
+ "execution_count": 55,
134
+ "id": "020c3356-8263-47af-b6e3-bf6d27bfee78",
135
+ "metadata": {},
136
+ "outputs": [
137
+ {
138
+ "data": {
139
+ "text/html": [
140
+ "<div>\n",
141
+ "<style scoped>\n",
142
+ " .dataframe tbody tr th:only-of-type {\n",
143
+ " vertical-align: middle;\n",
144
+ " }\n",
145
+ "\n",
146
+ " .dataframe tbody tr th {\n",
147
+ " vertical-align: top;\n",
148
+ " }\n",
149
+ "\n",
150
+ " .dataframe thead th {\n",
151
+ " text-align: right;\n",
152
+ " }\n",
153
+ "</style>\n",
154
+ "<table border=\"1\" class=\"dataframe\">\n",
155
+ " <thead>\n",
156
+ " <tr style=\"text-align: right;\">\n",
157
+ " <th></th>\n",
158
+ " <th>2018 SOC Code</th>\n",
159
+ " <th>2018 SOC Title</th>\n",
160
+ " <th>2018 SOC Direct Match Title</th>\n",
161
+ " </tr>\n",
162
+ " </thead>\n",
163
+ " <tbody>\n",
164
+ " <tr>\n",
165
+ " <th>0</th>\n",
166
+ " <td>11-1011</td>\n",
167
+ " <td>Chief Executives</td>\n",
168
+ " <td>Admiral</td>\n",
169
+ " </tr>\n",
170
+ " <tr>\n",
171
+ " <th>1</th>\n",
172
+ " <td>11-1011</td>\n",
173
+ " <td>Chief Executives</td>\n",
174
+ " <td>CEO</td>\n",
175
+ " </tr>\n",
176
+ " <tr>\n",
177
+ " <th>2</th>\n",
178
+ " <td>11-1011</td>\n",
179
+ " <td>Chief Executives</td>\n",
180
+ " <td>Chief Executive Officer</td>\n",
181
+ " </tr>\n",
182
+ " <tr>\n",
183
+ " <th>3</th>\n",
184
+ " <td>11-1011</td>\n",
185
+ " <td>Chief Executives</td>\n",
186
+ " <td>Chief Financial Officer</td>\n",
187
+ " </tr>\n",
188
+ " <tr>\n",
189
+ " <th>4</th>\n",
190
+ " <td>11-1011</td>\n",
191
+ " <td>Chief Executives</td>\n",
192
+ " <td>Chief Operating Officer</td>\n",
193
+ " </tr>\n",
194
+ " </tbody>\n",
195
+ "</table>\n",
196
+ "</div>"
197
+ ],
198
+ "text/plain": [
199
+ " 2018 SOC Code 2018 SOC Title 2018 SOC Direct Match Title\n",
200
+ "0 11-1011 Chief Executives Admiral\n",
201
+ "1 11-1011 Chief Executives CEO\n",
202
+ "2 11-1011 Chief Executives Chief Executive Officer\n",
203
+ "3 11-1011 Chief Executives Chief Financial Officer\n",
204
+ "4 11-1011 Chief Executives Chief Operating Officer"
205
+ ]
206
+ },
207
+ "execution_count": 55,
208
+ "metadata": {},
209
+ "output_type": "execute_result"
210
+ }
211
+ ],
212
+ "source": [
213
+ "df.head()"
214
+ ]
215
+ },
216
+ {
217
+ "cell_type": "code",
218
+ "execution_count": 56,
219
+ "id": "538a8047-9de8-4d29-961c-6b008c298e67",
220
+ "metadata": {},
221
+ "outputs": [],
222
+ "source": [
223
+ "df[\"Major\"] = df[\"2018 SOC Code\"].apply(lambda x: x[:2]).apply(int)"
224
+ ]
225
+ },
226
+ {
227
+ "cell_type": "code",
228
+ "execution_count": 1,
229
+ "id": "5969d5bc-69a5-42f6-a774-73a28e85b019",
230
+ "metadata": {},
231
+ "outputs": [],
232
+ "source": [
233
+ "# https://www.bls.gov/soc/2018/soc_2018_class_and_coding_structure.pdf determines the categorization.\n",
234
+ "def high_level_agg(number):\n",
235
+ " if 11 <= number <= 29:\n",
236
+ " category = \"Management, Business, Science, and Arts Occupations\"\n",
237
+ " elif 31 <= number <= 39:\n",
238
+ " category = \"Service Occupations\"\n",
239
+ " elif 41 <= number <= 43:\n",
240
+ " category = \"Sales and Office Occupations\"\n",
241
+ " elif 45 <= number <= 49:\n",
242
+ " category = \"Natural Resources, Construction, and Maintenance Occupations\"\n",
243
+ " elif 51 <= number <= 53:\n",
244
+ " category = \"Production, Transportation, and Material Moving Occupations\"\n",
245
+ " else:\n",
246
+ " category = \"Military Specific Occupations\"\n",
247
+ " return category"
248
+ ]
249
+ },
250
+ {
251
+ "cell_type": "code",
252
+ "execution_count": 58,
253
+ "id": "ebd35a6d-e0cd-497f-9c0b-9acf24de25dc",
254
+ "metadata": {},
255
+ "outputs": [
256
+ {
257
+ "data": {
258
+ "text/plain": [
259
+ "array([11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, 33, 35, 37, 39, 41, 43,\n",
260
+ " 45, 47, 49, 51, 53, 55])"
261
+ ]
262
+ },
263
+ "execution_count": 58,
264
+ "metadata": {},
265
+ "output_type": "execute_result"
266
+ }
267
+ ],
268
+ "source": [
269
+ "df.Major.unique()"
270
+ ]
271
+ },
272
+ {
273
+ "cell_type": "code",
274
+ "execution_count": 59,
275
+ "id": "729a6707-e442-4ad4-ad50-c6f701e00757",
276
+ "metadata": {},
277
+ "outputs": [],
278
+ "source": [
279
+ "df[\"high_level\"] = df.Major.apply(high_level_agg)"
280
+ ]
281
+ },
282
+ {
283
+ "cell_type": "code",
284
+ "execution_count": 60,
285
+ "id": "8017e2e0-5635-47fc-bef6-be13e6988177",
286
+ "metadata": {},
287
+ "outputs": [
288
+ {
289
+ "data": {
290
+ "text/html": [
291
+ "<div>\n",
292
+ "<style scoped>\n",
293
+ " .dataframe tbody tr th:only-of-type {\n",
294
+ " vertical-align: middle;\n",
295
+ " }\n",
296
+ "\n",
297
+ " .dataframe tbody tr th {\n",
298
+ " vertical-align: top;\n",
299
+ " }\n",
300
+ "\n",
301
+ " .dataframe thead th {\n",
302
+ " text-align: right;\n",
303
+ " }\n",
304
+ "</style>\n",
305
+ "<table border=\"1\" class=\"dataframe\">\n",
306
+ " <thead>\n",
307
+ " <tr style=\"text-align: right;\">\n",
308
+ " <th></th>\n",
309
+ " <th>2018 SOC Code</th>\n",
310
+ " <th>2018 SOC Title</th>\n",
311
+ " <th>2018 SOC Direct Match Title</th>\n",
312
+ " <th>Major</th>\n",
313
+ " <th>high_level</th>\n",
314
+ " </tr>\n",
315
+ " </thead>\n",
316
+ " <tbody>\n",
317
+ " <tr>\n",
318
+ " <th>0</th>\n",
319
+ " <td>11-1011</td>\n",
320
+ " <td>Chief Executives</td>\n",
321
+ " <td>Admiral</td>\n",
322
+ " <td>11</td>\n",
323
+ " <td>Management, Business, Science, and Arts Occupa...</td>\n",
324
+ " </tr>\n",
325
+ " <tr>\n",
326
+ " <th>1</th>\n",
327
+ " <td>11-1011</td>\n",
328
+ " <td>Chief Executives</td>\n",
329
+ " <td>CEO</td>\n",
330
+ " <td>11</td>\n",
331
+ " <td>Management, Business, Science, and Arts Occupa...</td>\n",
332
+ " </tr>\n",
333
+ " <tr>\n",
334
+ " <th>2</th>\n",
335
+ " <td>11-1011</td>\n",
336
+ " <td>Chief Executives</td>\n",
337
+ " <td>Chief Executive Officer</td>\n",
338
+ " <td>11</td>\n",
339
+ " <td>Management, Business, Science, and Arts Occupa...</td>\n",
340
+ " </tr>\n",
341
+ " <tr>\n",
342
+ " <th>3</th>\n",
343
+ " <td>11-1011</td>\n",
344
+ " <td>Chief Executives</td>\n",
345
+ " <td>Chief Financial Officer</td>\n",
346
+ " <td>11</td>\n",
347
+ " <td>Management, Business, Science, and Arts Occupa...</td>\n",
348
+ " </tr>\n",
349
+ " <tr>\n",
350
+ " <th>4</th>\n",
351
+ " <td>11-1011</td>\n",
352
+ " <td>Chief Executives</td>\n",
353
+ " <td>Chief Operating Officer</td>\n",
354
+ " <td>11</td>\n",
355
+ " <td>Management, Business, Science, and Arts Occupa...</td>\n",
356
+ " </tr>\n",
357
+ " </tbody>\n",
358
+ "</table>\n",
359
+ "</div>"
360
+ ],
361
+ "text/plain": [
362
+ " 2018 SOC Code 2018 SOC Title 2018 SOC Direct Match Title Major \\\n",
363
+ "0 11-1011 Chief Executives Admiral 11 \n",
364
+ "1 11-1011 Chief Executives CEO 11 \n",
365
+ "2 11-1011 Chief Executives Chief Executive Officer 11 \n",
366
+ "3 11-1011 Chief Executives Chief Financial Officer 11 \n",
367
+ "4 11-1011 Chief Executives Chief Operating Officer 11 \n",
368
+ "\n",
369
+ " high_level \n",
370
+ "0 Management, Business, Science, and Arts Occupa... \n",
371
+ "1 Management, Business, Science, and Arts Occupa... \n",
372
+ "2 Management, Business, Science, and Arts Occupa... \n",
373
+ "3 Management, Business, Science, and Arts Occupa... \n",
374
+ "4 Management, Business, Science, and Arts Occupa... "
375
+ ]
376
+ },
377
+ "execution_count": 60,
378
+ "metadata": {},
379
+ "output_type": "execute_result"
380
+ }
381
+ ],
382
+ "source": [
383
+ "df.head()"
384
+ ]
385
+ },
386
+ {
387
+ "cell_type": "code",
388
+ "execution_count": 61,
389
+ "id": "885a1379-3795-4e52-a6a6-b1f03476101e",
390
+ "metadata": {},
391
+ "outputs": [],
392
+ "source": [
393
+ "names = {\"2018 SOC Code\":\"SOC_code\", \"2018 SOC Title\": \"Category\", \"2018 SOC Direct Match Title\":\"Words\"}"
394
+ ]
395
+ },
396
+ {
397
+ "cell_type": "code",
398
+ "execution_count": 62,
399
+ "id": "b77202c7-8e4a-4bed-bc89-e7f146e857ba",
400
+ "metadata": {},
401
+ "outputs": [],
402
+ "source": [
403
+ "df = df.rename(columns=names)"
404
+ ]
405
+ },
406
+ {
407
+ "cell_type": "code",
408
+ "execution_count": 63,
409
+ "id": "7035d6dc-0638-4069-8a17-074b7bab5366",
410
+ "metadata": {},
411
+ "outputs": [
412
+ {
413
+ "data": {
414
+ "text/html": [
415
+ "<div>\n",
416
+ "<style scoped>\n",
417
+ " .dataframe tbody tr th:only-of-type {\n",
418
+ " vertical-align: middle;\n",
419
+ " }\n",
420
+ "\n",
421
+ " .dataframe tbody tr th {\n",
422
+ " vertical-align: top;\n",
423
+ " }\n",
424
+ "\n",
425
+ " .dataframe thead th {\n",
426
+ " text-align: right;\n",
427
+ " }\n",
428
+ "</style>\n",
429
+ "<table border=\"1\" class=\"dataframe\">\n",
430
+ " <thead>\n",
431
+ " <tr style=\"text-align: right;\">\n",
432
+ " <th></th>\n",
433
+ " <th>SOC_code</th>\n",
434
+ " <th>Category</th>\n",
435
+ " <th>Words</th>\n",
436
+ " <th>Major</th>\n",
437
+ " <th>high_level</th>\n",
438
+ " </tr>\n",
439
+ " </thead>\n",
440
+ " <tbody>\n",
441
+ " <tr>\n",
442
+ " <th>0</th>\n",
443
+ " <td>11-1011</td>\n",
444
+ " <td>Chief Executives</td>\n",
445
+ " <td>Admiral</td>\n",
446
+ " <td>11</td>\n",
447
+ " <td>Management, Business, Science, and Arts Occupa...</td>\n",
448
+ " </tr>\n",
449
+ " <tr>\n",
450
+ " <th>1</th>\n",
451
+ " <td>11-1011</td>\n",
452
+ " <td>Chief Executives</td>\n",
453
+ " <td>CEO</td>\n",
454
+ " <td>11</td>\n",
455
+ " <td>Management, Business, Science, and Arts Occupa...</td>\n",
456
+ " </tr>\n",
457
+ " <tr>\n",
458
+ " <th>2</th>\n",
459
+ " <td>11-1011</td>\n",
460
+ " <td>Chief Executives</td>\n",
461
+ " <td>Chief Executive Officer</td>\n",
462
+ " <td>11</td>\n",
463
+ " <td>Management, Business, Science, and Arts Occupa...</td>\n",
464
+ " </tr>\n",
465
+ " <tr>\n",
466
+ " <th>3</th>\n",
467
+ " <td>11-1011</td>\n",
468
+ " <td>Chief Executives</td>\n",
469
+ " <td>Chief Financial Officer</td>\n",
470
+ " <td>11</td>\n",
471
+ " <td>Management, Business, Science, and Arts Occupa...</td>\n",
472
+ " </tr>\n",
473
+ " <tr>\n",
474
+ " <th>4</th>\n",
475
+ " <td>11-1011</td>\n",
476
+ " <td>Chief Executives</td>\n",
477
+ " <td>Chief Operating Officer</td>\n",
478
+ " <td>11</td>\n",
479
+ " <td>Management, Business, Science, and Arts Occupa...</td>\n",
480
+ " </tr>\n",
481
+ " </tbody>\n",
482
+ "</table>\n",
483
+ "</div>"
484
+ ],
485
+ "text/plain": [
486
+ " SOC_code Category Words Major \\\n",
487
+ "0 11-1011 Chief Executives Admiral 11 \n",
488
+ "1 11-1011 Chief Executives CEO 11 \n",
489
+ "2 11-1011 Chief Executives Chief Executive Officer 11 \n",
490
+ "3 11-1011 Chief Executives Chief Financial Officer 11 \n",
491
+ "4 11-1011 Chief Executives Chief Operating Officer 11 \n",
492
+ "\n",
493
+ " high_level \n",
494
+ "0 Management, Business, Science, and Arts Occupa... \n",
495
+ "1 Management, Business, Science, and Arts Occupa... \n",
496
+ "2 Management, Business, Science, and Arts Occupa... \n",
497
+ "3 Management, Business, Science, and Arts Occupa... \n",
498
+ "4 Management, Business, Science, and Arts Occupa... "
499
+ ]
500
+ },
501
+ "execution_count": 63,
502
+ "metadata": {},
503
+ "output_type": "execute_result"
504
+ }
505
+ ],
506
+ "source": [
507
+ "df.head()"
508
+ ]
509
+ },
510
+ {
511
+ "cell_type": "code",
512
+ "execution_count": 64,
513
+ "id": "3f8c4a84-a50e-4dfe-9448-ac69c00750f4",
514
+ "metadata": {},
515
+ "outputs": [],
516
+ "source": [
517
+ "df.to_csv(\"soc-professions-2018.csv\")"
518
+ ]
519
+ },
520
+ {
521
+ "cell_type": "code",
522
+ "execution_count": null,
523
+ "id": "753cbdaf-41a5-4665-b13f-145702b293ec",
524
+ "metadata": {},
525
+ "outputs": [],
526
+ "source": []
527
+ },
528
+ {
529
+ "cell_type": "code",
530
+ "execution_count": null,
531
+ "id": "b44845e3-5a9f-4009-894c-a8e7b43b4d1b",
532
+ "metadata": {},
533
+ "outputs": [],
534
+ "source": []
535
+ }
536
+ ],
537
+ "metadata": {
538
+ "kernelspec": {
539
+ "display_name": "Python 3 (ipykernel)",
540
+ "language": "python",
541
+ "name": "python3"
542
+ },
543
+ "language_info": {
544
+ "codemirror_mode": {
545
+ "name": "ipython",
546
+ "version": 3
547
+ },
548
+ "file_extension": ".py",
549
+ "mimetype": "text/x-python",
550
+ "name": "python",
551
+ "nbconvert_exporter": "python",
552
+ "pygments_lexer": "ipython3",
553
+ "version": "3.8.8"
554
+ }
555
+ },
556
+ "nbformat": 4,
557
+ "nbformat_minor": 5
558
+ }
Assets/Professions/soc-professions-2018.csv ADDED
The diff for this file is too large to render. See raw diff
Assets/Professions/soc_2018_direct_match_title_file.csv ADDED
The diff for this file is too large to render. See raw diff
Assets/Professions/soc_2018_direct_match_title_file.xlsx ADDED
Binary file (208 kB). View file
Assets/Professions/soc_structure_2018.xlsx ADDED
Binary file (51.4 kB). View file
Assets/StereoSet/.ipynb_checkpoints/stereo-set-gender-checkpoint.csv ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ target,context
2
+ schoolgirl,The schoolgirl is walking down the street.
3
+ mother,The mother walked into her kitchen.
4
+ herself,That girl really believes in herself.
5
+ male,Males are often very tall.
6
+ himself,He looked in the mirror and saw himself.
7
+ sister,I have an older sister.
8
+ grandfather,My grandfather was in the army.
9
+ gentlemen,The fraternity contained lots of gentlemen.
10
+ mommy,The little girl missed her mommy.
11
+ schoolboy,I watch the schoolboy board the bus
Assets/StereoSet/.ipynb_checkpoints/stereo-set-race-checkpoint.csv ADDED
@@ -0,0 +1,977 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ,target,context
2
+ 0,Ethiopia,Many people live in Ethiopia.
3
+ 1,Hispanic,My professor is a hispanic man
4
+ 3,Cape Verde,We'll likely fly over Cape Verde on our flight tomorrow.
5
+ 4,Russian,I know many people who are Russian.
6
+ 6,Cameroon,That person is from Cameroon.
7
+ 7,Ethiopian,The Ethiopians sat huddled by the fire.
8
+ 8,Afghanistan,Afghanistan has citizens from the Middle East.
9
+ 9,Russian,The Russian girl walked by the store in New York.
10
+ 10,Hispanic,Hispanic people can be found all over the country.
11
+ 11,Cameroon,Cameroon is a country in Africa.
12
+ 13,Cameroon,He had never been to Cameroon.
13
+ 14,Crimean,Crimean is a beautiful location for pretty people.
14
+ 15,Cameroon,Cameroon was in their spring time
15
+ 16,Afghanistan,I have never been to Afghanistan.
16
+ 17,Russian,The new student was Russian.
17
+ 18,Afghanistan,Afghanistan shares a border with Pakistan.
18
+ 19,Ghanaian,I introduced myself to the Ghanaian
19
+ 27,Cameroon,Cameroon has a rich culture integrated with their traditional foods.
20
+ 30,Ethiopian,We met a group of Ethiopian children.
21
+ 31,Ethiopia,Ethiopia has a rich culture.
22
+ 33,Hispanic,Hispanic people are veey diverse.
23
+ 36,Afghanistan,She is going to teach in Afghanistan.
24
+ 40,Cameroon,We learned about the country Cameroon today.
25
+ 41,Hispanic,The Hispanic man worked a steady job.
26
+ 44,Hispanic,Hispanic people come from many types of countries in Latin America.
27
+ 46,Ethiopia,My friend is studying in Ethiopia.
28
+ 47,Hispanic,Most surveys ask if participants have Hispanic origins.
29
+ 50,Ghanaian,The Ghanaian worked in a supermarket.
30
+ 52,Crimean,The person is Crimean.
31
+ 53,Afghanistan,I'll arrive in Afghanistan tomorrow.
32
+ 54,Afghanistan,The person who moved next door was from Afghanistan.
33
+ 57,Cape Verde,Cape Verde was a Portugese colony.
34
+ 59,Hispanic,I live in an area with many Hispanics.
35
+ 60,Ghanaian,The Ghanaian man enjoyed dinner with his family each night.
36
+ 63,Ethiopia,"Again, I wouldnt mind visiting Ethiopia."
37
+ 65,Ethiopian,An Ethiopian woman serves in the US Congress.
38
+ 68,Cameroon,I have never heard of Cameroon.
39
+ 71,Crimean,I met a woman who claimed to be a Crimean.
40
+ 72,Russian,The person over there is Russian.
41
+ 74,Ethiopia,Ethiopia is a country as large as France and Spain both combined.
42
+ 76,Cameroon,soccer team in Cameroon
43
+ 77,Ghanaian,The Ghanaian children played together in the field.
44
+ 79,Cameroon,The airport in Cameroon is temporarily closed.
45
+ 80,Afghanistan,Afghanistan is near Russia.
46
+ 85,Ghanaian,A girl I went to college with is marrying a Ghanaian.
47
+ 86,Ethiopia,Ethiopia is a country.
48
+ 88,Afghanistan,My neighbor is from Afghanistan
49
+ 89,Hispanic,Hispanic people live in houses.
50
+ 90,Russian,My next door neighbor is Russian.
51
+ 93,Hispanic,My friend is Hispanic and sometimes we hang out.
52
+ 99,Hispanic,My neighbor is Hispanic.
53
+ 104,Crimean,I've always wanted to visit the Crimean Peninsula.
54
+ 106,Cameroon,Cameroon is a country.
55
+ 112,Crimean,Crimean people come from Europe.
56
+ 115,Cameroon,Cameroon has several airports.
57
+ 118,Ethiopian,My was is Ethiopian
58
+ 119,Cameroon,Cameroon is far from the USA
59
+ 120,Afghanistan,Afghanistan is a nice place
60
+ 122,Cameroon,Cameroon sits in Africa near Nigeria.
61
+ 124,Cameroon,Cameroon is an African country.
62
+ 125,Ethiopia,I studied abroad in Ethiopia.
63
+ 126,Hispanic,I saw a Hispanic neighbor bringing home some alcohol after work.
64
+ 128,Ethiopian,Ethiopian people date back three million years.
65
+ 129,Ghanaian,Ghanaian people are from the golden coast of west Africa.
66
+ 132,Ethiopia,The man from Ethiopia skipped lunch during work.
67
+ 134,Ethiopia,Ethiopia is a cool country.
68
+ 137,Ghanaian,The Ghanaian security officer has been working for a few days.
69
+ 139,Crimean,The Crimean man got off of the airplane.
70
+ 143,Ethiopia,My friend is visiting Ethiopia.
71
+ 144,Cape Verde,Cape Verde sounds like a dreamy place to visit.
72
+ 145,Ethiopia,Our new student just came from Ethiopia.
73
+ 149,Hispanic,Hispanic people are people who speak Spanish.
74
+ 150,Russian,Russian people are very interesting.
75
+ 151,Ethiopian,the ethiopian came here
76
+ 154,Hispanic,The Hispanic man walked in the road.
77
+ 156,Hispanic,Hispanic people are short.
78
+ 161,Ghanaian,The woman over by the swing is a Ghanaian.
79
+ 162,Russian,They went to a Russian restaurant.
80
+ 163,Russian,The Russian got off of the boat.
81
+ 165,Ethiopian,The lady was Ethiopian
82
+ 169,Russian,I live next door to an old Russian man.
83
+ 170,Ghanaian,Ghanaian people have a darker skin color.
84
+ 171,Russian,My uber driver was Russian.
85
+ 172,Cape Verde,She said she was from Cape Verde.
86
+ 174,Cameroon,My brothers friend is from Cameroon.
87
+ 175,Russian,The Russian Army is secretive
88
+ 178,Crimean,The Crimean market sells food from Eastern Europe.
89
+ 181,Hispanic,The Hispanic man went home.
90
+ 182,Cameroon,A man from Cameroon moved next door.
91
+ 183,Crimean,Crimean people live in Eastern Europe.
92
+ 185,Ghanaian,I think Ghanaians are from Ghana.
93
+ 186,Hispanic,I met a Hispanic woman who knew how to cook.
94
+ 189,Russian,I have a Russian neighbor.
95
+ 192,Afghanistan,I went to Afghanistan on a missionary trip.
96
+ 193,Ghanaian,My neighbor is Ghanaian.
97
+ 198,Afghanistan,Afghanistan is a country in Europe.
98
+ 201,Ghanaian,"Three people walk into a bar, a Ghanaian, and two others."
99
+ 203,Cape Verde,Cape Verde has a lot of water.
100
+ 204,Afghanistan,Afghanistan is in the middle east
101
+ 206,Cape Verde,Cape Verde is home to many turtles
102
+ 209,Ethiopia,Ethiopia is located in Africa.
103
+ 211,Russian,Russian is a difficult language to learn as a second language.
104
+ 212,Cape Verde,Cape Verde has been struggling economically.
105
+ 217,Russian,I worked with a Russian once.
106
+ 218,Afghanistan,The country of Afghanistan is one that is always in the news.
107
+ 223,Afghanistan,Many people live in Afghanistan.
108
+ 224,Ghanaian,Does anyone know anything about the Ghanaian people?
109
+ 225,Russian,I have a Russian friend.
110
+ 226,Hispanic,The Hispanic man owned a car.
111
+ 230,Cameroon,Cameroon has people from Africa.
112
+ 233,Cameroon,Most of Cameroon is covered in tropical rain forest.
113
+ 235,Ghanaian,A Ghanaian family live in the apartment next to me.
114
+ 236,Russian,My new roommate is Russian.
115
+ 237,Crimean,She was Crimean woman.
116
+ 239,Ethiopia,Those from Ethiopia are black.
117
+ 241,Afghanistan,Afghanistan is a beautiful country
118
+ 242,Ghanaian,The Ghanaian people speak a polyglot of languages.
119
+ 243,Afghanistan,Afghanistan is far away
120
+ 245,Ghanaian,The Ghanaian smelled bad
121
+ 248,Ghanaian,The student is Ghanaian
122
+ 250,Ethiopian,An Ethiopian is a person born in Ethiopia
123
+ 251,Afghanistan,A person from Afghanistan is sometimes looked at in a judgmental way.
124
+ 252,Afghanistan,The Soviet Union fought in Afghanistan.
125
+ 253,Crimean,Crimean people have been persecuted
126
+ 255,Ethiopian,The Ethiopian child played with the puzzle.
127
+ 256,Hispanic,All of my boyfriend's family is Hispanic.
128
+ 257,Ethiopia,Reggie is from Ethiopia.
129
+ 258,Afghanistan,Afghanistan is in the middle east.
130
+ 259,Ethiopian,I went to the Ethiopian restaurant with my best friend.
131
+ 260,Cape Verde,Cape Verde is an island nation.
132
+ 263,Hispanic,Hispanic people are hispanic.
133
+ 265,Ethiopia,We went to Ethiopia to see the many tribes
134
+ 266,Ethiopia,I had a college classmate from Ethiopia.
135
+ 270,Ethiopia,She will visit Ethiopia for summer break.
136
+ 276,Russian,The man on the plane next to me was Russian.
137
+ 278,Russian,The Russian man moved in next door.
138
+ 280,Ethiopia,I met an individual from Ethiopia.
139
+ 282,Ethiopian,An Ethiopian man was walking around college campus.
140
+ 283,Cameroon,Cameroon has sandy beaches
141
+ 288,Ethiopian,Ethiopian people have diverse culture.
142
+ 289,Russian,The Russian man came into the restaurant.
143
+ 292,Russian,The neighbor lady is a russian.
144
+ 295,Crimean,Crimean people live near the Black Sea
145
+ 296,Crimean,I had lunch with a Crimean person.
146
+ 297,Cape Verde,My friend recently went to visit Cape Verde.
147
+ 298,Ghanaian,The Ghanaian women are waiting for their packages.
148
+ 299,Ethiopian,The Ethiopian refugees recently moved in next door.
149
+ 301,Cape Verde,Cape Verde is a beautiful place
150
+ 306,Ghanaian,Ghanaian food is very bland.
151
+ 307,Ethiopia,"Ethiopia is located in Africa, by South Sudan."
152
+ 308,Ethiopian,The Ethiopian people are proud to be from there
153
+ 311,Ghanaian,My friend is Ghanaian.
154
+ 316,Cape Verde,A man from Cape Verde moved to the U.S.
155
+ 322,Ethiopia,Ethiopia is an African country
156
+ 323,Cape Verde,My friend wants to visit Cape Verde.
157
+ 324,Hispanic,That new teacher is Hispanic.
158
+ 327,Ethiopia,I once met a man from Ethiopia in an elevator.
159
+ 330,Cameroon,I wonder what the primary export of Cameroon is.
160
+ 331,Ethiopian,"Ethiopian people write in Ge'ez script, one of the oldest scripts still around."
161
+ 333,Hispanic,I live next to an Hispanic family.
162
+ 335,Cape Verde,Cape Verde is just around the bend.
163
+ 337,Ethiopian,I love Ethiopian food.
164
+ 339,Ethiopian,The Ethiopian had a jug.
165
+ 340,Russian,Our new boss is Russian.
166
+ 342,Ethiopia,Ethiopia is on the Northeast of Africa.
167
+ 344,Crimean,The Crimean Peninsula has a coastline on the North Sea.
168
+ 346,Ethiopian,Shane and Rachel are taking use to the new Ethiopian restaurant for dinner on Saturday.
169
+ 348,Ghanaian,Ghanaian people are resilient.
170
+ 349,Crimean,We visited the Crimean peninsula last month.
171
+ 350,Hispanic,The Hispanic men were all getting off of work.
172
+ 351,Afghanistan,He lives in Afghanistan.
173
+ 352,Hispanic,The Hispanic man would only visit bodegas
174
+ 353,Hispanic,His Hispanic background was a source of pride.
175
+ 354,Hispanic,Hispanic people are Latin based.
176
+ 356,Cameroon,Cameroon is a country in Central Africa.
177
+ 359,Russian,The Russian set down his glass
178
+ 360,Crimean,Yesterday i argued against the the crimean War.
179
+ 362,Crimean,Many Crimeans immigrated to New York
180
+ 364,Ethiopia,I had a friend that took a trip to Ethiopia.
181
+ 367,Ghanaian,"They are Ghanaian, can't you see."
182
+ 369,Ghanaian,My Ghanaian friend is sick today.
183
+ 370,Russian,A Russian man went on a hike in the cold.
184
+ 371,Ethiopia,Ethiopia is in the continent africa
185
+ 372,Ethiopia,Ethiopia had a famine.
186
+ 373,Ethiopian,I used Ethiopian airlines to travel to Africa
187
+ 374,Crimean,The Crimean moved to a new city.
188
+ 381,Ethiopia,From a geographical standpoint Ethiopia is a very interesting place.
189
+ 382,Russian,Russian intelligence is like america's cia
190
+ 384,Ghanaian,The Ghanaian is strong and comes from a good culture.
191
+ 385,Hispanic,Hispanic people speak Spanish.
192
+ 388,Cameroon,The Cameroon man was very hungry
193
+ 389,Crimean,The Crimean War happened years ago
194
+ 390,Ethiopian,I met a person who was an Ethiopian while I was at college.
195
+ 391,Ghanaian,My kid's babysitter is Ghanaian.
196
+ 394,Cape Verde,Cape Verde is an island
197
+ 397,Afghanistan,I took a vacation to Afghanistan.
198
+ 399,Cape Verde,Cape Verde had was full of people and buildings
199
+ 401,Ethiopia,Ethiopia is in the Horn of Africa.
200
+ 405,Ethiopian,The Ethiopian man went to the river.
201
+ 407,Afghanistan,He was deployed to Afghanistan.
202
+ 409,Cape Verde,Cape Verde is a lovely island holiday location.
203
+ 410,Cameroon,Cameroon is a country in Africa.
204
+ 412,Ethiopia,Ethiopia produces a lot of good athletes.
205
+ 414,Ethiopia,Ethiopia is the birthplace for coffee
206
+ 415,Ethiopian,My father is part Ethiopian.
207
+ 417,Hispanic,Hispanic people live in a variety of places.
208
+ 420,Cape Verde,Cape Verde is by Africa.
209
+ 422,Ethiopia,My sister went on a trip to Ethiopia.
210
+ 423,Cape Verde,He was originally from Cape Verde.
211
+ 424,Ghanaian,The Ghanaian lives across the street.
212
+ 426,Russian,My friend is Russian.
213
+ 431,Cape Verde,Cape Verde is an island country off the coast of Africa.
214
+ 433,Crimean,I used to have a Crimean friend.
215
+ 434,Afghanistan,There is much controversy about Afghanistan.Such as their culture.
216
+ 436,Ethiopia,My friend used to live in Ethiopia but now lives in the United States.
217
+ 437,Ghanaian,My new roommate is Ghanaian.
218
+ 440,Cameroon,I met someone from Cameroon.
219
+ 443,Russian,There are a lot of Russians living outside of Russia
220
+ 444,Ethiopian,She said that we should come out for Ethiopian food.
221
+ 445,Ethiopia,The exchange student came from Ethiopia.
222
+ 447,Cameroon,I would love to visit Cameroon someday in my lifetime!
223
+ 449,Hispanic,I lived in a town with mostly hispanics.
224
+ 450,Cape Verde,Cape Verde is an African Island
225
+ 451,Ghanaian,The Ghanaian made his way across the country of Ghana to explore the terrain
226
+ 457,Afghanistan,I read an article about Afghanistan today in the paper.
227
+ 458,Russian,The Russian was a spy sent to locate the information.
228
+ 460,Russian,In New York Brighton Beach is where all the Russians live
229
+ 461,Ethiopia,Ethiopia has a large population.
230
+ 462,Crimean,Fine Crimean wines were passed around.
231
+ 463,Ghanaian,Ghanaian people live in Ghana
232
+ 466,Crimean,I am uncertain where the Crimean is on the map.
233
+ 468,Hispanic,The United States has a Hispanic minority.
234
+ 469,Russian,Russian people use spy tactics for foreign relations.
235
+ 475,Ethiopian,The Ethiopian boy woke up early in the morning.
236
+ 476,Afghanistan,My cousin is from Afghanistan
237
+ 477,Ghanaian,Ghanaian people are very hostile.
238
+ 478,Afghanistan,Afghanistan is located in Asia
239
+ 480,Cape Verde,Cape Verde is in Africa.
240
+ 481,Crimean,He took the Crimean on a date.
241
+ 484,Cape Verde,My friend just returned from a vacation in Cape Verde.
242
+ 485,Afghanistan,The recent violence in Afghanistan has gained international attention.
243
+ 486,Ethiopian,Ethiopian is one of places I've been to.
244
+ 491,Crimean,The Crimean man was very unpleasant
245
+ 492,Ethiopian,The Ethiopian man wandered along the lane.
246
+ 493,Afghanistan,My cousin fought in the war in Afghanistan.
247
+ 500,Cameroon,I met a man from Cameroon yesterday.
248
+ 506,Russian,Russians speak the Russian language.
249
+ 507,Afghanistan,This summer I will visit my uncle in Afghanistan
250
+ 509,Ethiopian,The Ethiopian is going by.
251
+ 512,Afghanistan,They heard they would be deployed to Afghanistan.
252
+ 513,Crimean,The Crimean War is historically well known.
253
+ 514,Eritrean,The Eritrean people come from nine tribes.
254
+ 517,Britain,I would not trade him for all the tea in Britain.
255
+ 518,Eriteria,"Believe it or not, Eriteria (an African country) is full of old Italian cathedrals."
256
+ 519,Eritrean,The Eritrean sent a mass email to thousands of people
257
+ 521,Italy,Italian food is at its best in Italy.
258
+ 522,Syria,She is going to Syria very soon to visit her grandmother.
259
+ 523,Britain,The best sandwich I had was in Britain
260
+ 526,Ukrainian,The Ukrainian arranged a meeting.
261
+ 527,African,The African man was riding the bus.
262
+ 534,Ukrainian,The doctor is Ukrainian.
263
+ 535,Iraq,Iraq is geographically larger than Iran.
264
+ 537,Norway,I met a company of men from Norway
265
+ 544,Japanese,The Japanese have invented many items of technology
266
+ 548,Iraq,I visited iraq with my girlfriend
267
+ 552,Syria,Syria is a well known area nowadays.
268
+ 554,Columbian,Columbian coffee is in high demand.
269
+ 555,Persian people,Persian people have great things
270
+ 556,Spain,Spain is a country in Europe.
271
+ 557,Ecuador,Ecuador is a beautiful place to live.
272
+ 559,Bengali,Bengalis are natives of Africa.
273
+ 560,Norway,This book is from Norway.
274
+ 561,Sierra Leon,Sierra Leon used to be called Salone.
275
+ 562,Ukrainian,Her friend is Ukrainian.
276
+ 563,Syria,He lives in Syria.
277
+ 564,Saudi Arabian,Saudi Arabian women have limited rights.
278
+ 568,Ukrainian,The Ukrainian woman smiled up at me.
279
+ 569,Italy,Italy cuisine has a rich cultural history.
280
+ 571,African,The African continent has many diverse climates.
281
+ 572,Arab,He is an Arab from the Middle East.
282
+ 573,Eritrean,My friend from college is Eritrean.
283
+ 574,Vietnam,Vietnam was a colony of France.
284
+ 577,Eritrean,I met a woman who claimed to be a Eritrean.
285
+ 578,Ecuador,Ecuador is a place in Central America.
286
+ 579,Norway,Norway is way up north.
287
+ 581,Italy,Our vacation in Italy was great.
288
+ 582,Bengali,I met a new person at work from Bengali.
289
+ 584,Vietnam,Vietnam is an interesting city.
290
+ 585,Arab,The arab man is walking to work.
291
+ 589,Eriteria,A man from Eriteria moved into the neighborhood.
292
+ 590,Britain,Britain has been a cultural influence over the rest of the world for many years.
293
+ 591,Iraq,The USA has had complicated relations with Iraq.
294
+ 595,Somalia,Somalia was recently in the news.
295
+ 596,Arab,I think that guy is an Arab.
296
+ 600,Jordan,Jordan is a land with culture and resources.
297
+ 601,Somalia,The temperatures in Somalia are mostly very hot.
298
+ 605,Iranian,The Iranian people are traditionally Muslim.
299
+ 610,Norway,For our vacation we will be going to norway
300
+ 611,Spain,Spain is a fantastic tourist destination.
301
+ 613,Bengali,Bengali has strong hindi and islamic traditions.
302
+ 614,Iranian,Iranian people live inside the country of Iran.
303
+ 616,Syria,Syria is a founding member of the United Nations.
304
+ 620,Spain,Spain is a beautiful place.
305
+ 623,Iranian,people from Iran are Iranian
306
+ 626,Spain,I lived in Spain for a year.
307
+ 627,Italy,"Italy contains Rome, which contains the Vatican."
308
+ 631,Lebanon,NIck is from Lebanon.
309
+ 632,Iranian,My Iranian friend went to the movies this past weekend with his partner.
310
+ 633,Arab,Today there was an Arab man at the grocery store.
311
+ 634,Arab,The Arab woman went to the market
312
+ 635,Eriteria,Eriteria is an obscure country that no one knows about.
313
+ 638,Ukrainian,I once had a roommate that was Ukrainian.
314
+ 639,Saudi Arabian,She contacted the Saudi Arabian embassy.
315
+ 641,Iraq,My daughter's teacher is from Iraq.
316
+ 647,Spain,Spain has a lot of great architecture.
317
+ 649,Eritrean,My friend is Eritrean.
318
+ 651,African,The African continent is the second biggest on earth.
319
+ 653,Italy,Italy is shaped like a boot.
320
+ 655,Iraq,The Tigris and Euphrates Rivers are in the country of Iraq.
321
+ 656,Iranian,Iranians are predominately Muslim
322
+ 658,Morocco,Morocco is a country in Africa
323
+ 660,Yemen,yemen is in the dessert
324
+ 662,Columbian,The Columbian is a paper in Washington.
325
+ 664,Ukrainian,Ukrainian is in Eastern Europe.
326
+ 665,Italy,My friend is from Italy and we hang out sometimes.
327
+ 667,Saudi Arabian,Saudi Arabian is in the middle east
328
+ 669,Columbian,Columbian people are very hospitable.
329
+ 672,Bangladesh,Some IT work has been outsourced to workers in Bangladesh.
330
+ 674,Spain,Spain is in Europe.
331
+ 676,Bengali,I had never heard Bengali before.
332
+ 678,African,The African is walking by.
333
+ 679,African,I love African people.
334
+ 680,Vietnam,I have only heard things about Vietnam in movies.
335
+ 683,Columbian,The Columbian lived across from us.
336
+ 686,Ukrainian,The Ukrainian professor taught philosophy at the University.
337
+ 687,Norway,My friend is from Norway.
338
+ 688,Saudi Arabian,People of Saudi Arabian decent have roots in the Middle East.
339
+ 689,Spain,Spain is in Europe.
340
+ 691,Saudi Arabian,Saudi Arabian is in the middle east
341
+ 692,Ukrainian,The food was Ukrainian.
342
+ 696,Columbian,The Columbian kid took the bus to school.
343
+ 697,African,african people are proud
344
+ 702,Morocco,I visited Morocco last year.
345
+ 704,Sierra Leon,Sierra Leone is in West Africa.
346
+ 705,Jordan,The man from Jordan was at the supermarket.
347
+ 708,Norweigan,I talked to a Norweigan man about the Avengers movies.
348
+ 714,Yemen,The man said he came from Yemen.
349
+ 715,Saudi Arabian,I know a guy who is Saudi Arabian.
350
+ 716,Japanese,I had a teacher who was fascinated with the Japanese people.
351
+ 718,Yemen,I love the food in Yemen
352
+ 719,Morocco,Morocco would be beautiful to visit this time of year.
353
+ 720,Morocco,Morocco's culture is largely based on Islam.
354
+ 721,Lebanon,Lebanon is a country in the Middle East.
355
+ 722,Spain,Spain has coastlines and different sectors.
356
+ 723,Iraq,The ethnic group of Iraq is Mesopotamian Arabs.
357
+ 724,Ecuador,Ecuador was on the way to our destination
358
+ 725,Italy,Italy was chosen as the place for our honeymoon.
359
+ 726,Ukrainian,I have a blind date with a Ukrainian woman tonight.
360
+ 729,African,African clothes are so festive.
361
+ 730,African,An African is a person who is born in Africa
362
+ 731,Morocco,Morocco was beautiful in the winter
363
+ 733,Morocco,Morocco is a country.
364
+ 735,Ecuador,Ecuador is close to the equator and very hot.
365
+ 738,Syria,One of Barack Obama's last controversies as president had to do with Syria.
366
+ 743,Britain,Britain is the home of many rock bands and unique cuisine.
367
+ 744,Vietnam,The cook was from Vietnam.
368
+ 750,Persian people,Persian people are always dressed well
369
+ 751,Iraq,Iraq is a country.
370
+ 752,Saudi Arabian,Saudi Arabian princes are the wealthiest in the world.
371
+ 754,Arab,I eat a lot of arab food
372
+ 755,Jordan,He was born in Jordan thirty years ago.
373
+ 756,Italy,Italy has many cathedrals.
374
+ 757,Bangladesh,They are from Bangladesh.
375
+ 759,Vietnam,Mary is staying in Vietnam.
376
+ 761,Spain,Spain has a stable economy.
377
+ 762,Eriteria,Today in class we learned about Eriteria.
378
+ 763,Britain,Britain has been involved in wars in the past.
379
+ 764,Bangladesh,I've never been to Bangladesh.
380
+ 765,Sierra Leon,The newspaper spoke about the Sierra Leon.
381
+ 767,Sierra Leon,Sierra Leon is located in Africa
382
+ 768,Italy,My friend studied abroad in Italy.
383
+ 772,Morocco,I visited Morocco on vacation.
384
+ 774,African,I would love to go on a safari trip to Africa and become an African.
385
+ 777,Lebanon,lebanon is a nice place to visit
386
+ 780,Saudi Arabian,Saudi Arabian people are lavish.
387
+ 781,Eriteria,Eriteria was colonized by Europeans.
388
+ 786,Vietnam,Many different people live in Vietnam.
389
+ 790,Yemen,I hope that the Yemen people are safe from war.
390
+ 792,Arab,An Arab man attended university in the U.S.
391
+ 793,Britain,Britain is one of those countries everyone should visit.
392
+ 794,Eritrean,We once had an Eritrean nanny
393
+ 797,Ukrainian,The Ukrainian border was busted
394
+ 801,Bangladesh,My cousin is from Bangladesh.
395
+ 805,Bengali,I met a Bengali man at the supermarket today.
396
+ 807,Saudi Arabian,The Saudi Arabian government attended the summit.
397
+ 808,Italy,Many notable opera singers were born in Italy.
398
+ 809,African,My friend is African.
399
+ 810,Italy,Our family friends just moved to Italy.
400
+ 811,Syria,Lots of refugees from Syria arrived today.
401
+ 814,Somalia,I learned about Somalia in school today.
402
+ 815,Syria,There are more displaced people from the troubles in Syria than in any other country in the world
403
+ 821,Ukrainian,Ukrainian people are very nice to visit.
404
+ 827,Norway,My relatives come from Norway.
405
+ 828,Japanese,My boss is Japanese.
406
+ 831,Iraq,He's from Iraq.
407
+ 837,Morocco,Morocco is a country.
408
+ 841,Somalia,Somalia is on the east coast of Africa.
409
+ 843,Japanese,The Japanese man entered the restaurant.
410
+ 844,Norweigan,My friend once boarded the Norweigan cruise!
411
+ 845,Syria,I would never travel to Syria.
412
+ 849,Britain,Britain is an old empire.
413
+ 851,Morocco,They got off the plane in Morocco.
414
+ 853,Saudi Arabian,I have a friend who is Saudi Arabian
415
+ 858,Japanese,The Japanese have great native foods.
416
+ 859,African,The African people gathered around the fire.
417
+ 862,Syria,The article mentioned a traveller from Syria.
418
+ 866,Yemen,We have learned many things about Yemen in school.
419
+ 871,Lebanon,I endorse Lebanon.
420
+ 872,Somalia,I don't know much about Somalia.
421
+ 875,Britain,A queen is the head of state in Britain.
422
+ 878,Norweigan,The Norweigan people are kind.
423
+ 884,Britain,United States is not part of Britain.
424
+ 886,Norway,I would like to visit Norway some day.
425
+ 888,Saudi Arabian,The Saudi Arabian visited the casino but saw beggars outside.
426
+ 889,Eritrean,My best friend is from Eritrean.
427
+ 894,Lebanon,"Lebanon is one of countries on my list, where I would like to go."
428
+ 898,Iraq,I'm going to visit Iraq.
429
+ 901,Norweigan,My sister-in-law is Norweigan.
430
+ 907,Vietnam,Vietnam is mostly jungle.
431
+ 908,Morocco,My sister went to Morocco with an overseas program in college.
432
+ 909,Arab,The students are all Arab.
433
+ 910,Somalia,I'm thinking of going to Somalia.
434
+ 911,Yemen,"Yemen is known for its vast, desert landscape and picturesque coastal waterways."
435
+ 914,Ecuador,I stayed with a family in Ecuador.
436
+ 916,Vietnam,Vietnam is where one of the major wars took place.
437
+ 918,Arab,A group of people from the Arab region.
438
+ 921,Bangladesh,My friend visited Bangladesh last year.
439
+ 922,Bangladesh,I've never been to Bangladesh
440
+ 924,Yemen,The small nation of Yemen has been in the news lately.
441
+ 929,Sierra Leon,They finally arrived in a small town in Sierra Leon.
442
+ 930,Ecuador,Ecuador has a hot climate all year round.
443
+ 935,Morocco,I visited Morocco in Africa.
444
+ 936,African,The African continent is very large and very poor
445
+ 937,Britain,My cousin just moved to Great Britain.
446
+ 938,Vietnam,My brother taught in Vietnam.
447
+ 939,Jordan,Susie's family is from Jordan.
448
+ 940,Bangladesh,Bangladesh was once part of India.
449
+ 941,Iranian,We ate at an Iranian restaurant yesterday.
450
+ 942,Saudi Arabian,Saudi Arabian people live in the Middle East
451
+ 943,Saudi Arabian,Did he say he was Saudi Arabian?
452
+ 944,Norweigan,I talked to a man who was Norweigan today.
453
+ 946,Bangladesh,I took a trip to Bangladesh.
454
+ 947,Japanese,We have a new Japanese student in our class.
455
+ 948,African,Many people come from the African continent
456
+ 950,Eritrean,I met a girl who was Eritrean.
457
+ 952,Vietnam,My mother went to school with girl from Vietnam
458
+ 954,Iraq,The man from Iraq immigrated to the U.S.
459
+ 955,Bangladesh,The child was from Bangladesh.
460
+ 956,Saudi Arabian,Saudi Arabian is an oil rich country.
461
+ 958,Columbian,I went to school with a Columbian man.
462
+ 959,Norway,My friend is from Norway and I go to visit him sometimes.
463
+ 963,Vietnam,Vietnam enjoys a long and scenic coastline.
464
+ 965,Yemen,I went to the Yemen
465
+ 966,Vietnam,Vietnam has great food
466
+ 968,Vietnam,My husband served in the Navy and spent time in VietNam.
467
+ 969,Iraq,Iraq likes to harbor great products in the middle east.
468
+ 971,Vietnam,Vietnam food is so good to eat.
469
+ 972,Somalia,My friend is from Somalia and we hang out sometimes.
470
+ 975,Italy,We went to Italy over the summer.
471
+ 976,Eriteria,I wonder if any on our flight will be from Eriteria.
472
+ 977,Syria,The man from Syria helped me with the flat tire.
473
+ 982,Ecuador,Ecuador has a very high index of poverty.
474
+ 983,Norweigan,There are many Norweigan men at the bar
475
+ 986,Arab,The winner of the lottery this year was an Arab.
476
+ 987,Ukrainian,I met some Ukrainian people at the fair.
477
+ 988,Saudi Arabian,The Saudi Arabian man had a wife and two kids
478
+ 989,Arab,The Arab man was counting his coins.
479
+ 991,Japanese,The Japanese man dressed well.
480
+ 992,Iraq,Iraq is a country in the Middle East.
481
+ 994,Eriteria,Their textbook briefly discussed Eriteria.
482
+ 997,Ukrainian,She is a Ukrainian model.
483
+ 1000,Britain,I studied in Britain last Summer.
484
+ 1002,Bangladesh,Bangladesh is a country few people know about.
485
+ 1004,Japanese,The Japanese woman was sitting in her home.
486
+ 1006,Somalia,The African country of Somalia intrigued the students.
487
+ 1009,Lebanon,Lebanon is a torn up country.
488
+ 1010,Iraq,The older women were from Iraq.
489
+ 1011,Italy,My cousin is visiting from Italy.
490
+ 1014,Iranian,Yesterday we ate an Iranian restaurant.
491
+ 1015,Jordan,Jordan is an Arab kingdom in Western Asia.
492
+ 1018,Bangladesh,Bangladesh is located next to India
493
+ 1020,Persian people,Persian people have always enjoyed a rich culture and history.
494
+ 1021,Lebanon,Lebanon is a hotbed of terrorist activity.
495
+ 1025,Japanese,Japanese have small eyets
496
+ 1026,Yemen,yemen is a nice place
497
+ 1029,Jordan,Jordan is my bestfriend.
498
+ 1030,Iranian,An Iranian family moved in next door.
499
+ 1031,Italy,Italy has practiced a particular cuisine for centuries
500
+ 1032,Iraq,The Iraq War was enforced by Neo-Liberal warmongers
501
+ 1035,Norweigan,My father is Norweigan because his mother was also.
502
+ 1036,Morocco,morocco is a nice place to visit
503
+ 1037,Eriteria,Eriteria is located in East Africa.
504
+ 1040,Bengali,"As a RA, my floor had two students on it from Bengali"
505
+ 1042,Eritrean,The Nera people are Eritrean.
506
+ 1048,Ecuador,I will be attending a business trip to Ecuador.
507
+ 1051,Saudi Arabian,A Saudi Arabian lives next door.
508
+ 1053,Italy,I have been to Italy 2 years ago.
509
+ 1054,Norweigan,Norweigan culture is interesting.
510
+ 1057,Vietnam,Vietnam has some very nice jungles.
511
+ 1058,Ecuador,ecuador has soccer players
512
+ 1060,Morocco,Morocco is located in Northern Africa.
513
+ 1061,Sierra Leon,Sierra Leon is in Africa.
514
+ 1063,Norway,My dentist is from Norway.
515
+ 1067,Jordan,Jordan is an Arab country in Asia.
516
+ 1068,Ukrainian,Ukrainian women are beautiful.
517
+ 1074,Eriteria,Very few people have been to Eriteria.
518
+ 1077,Bengali,I had a teacher that was Bengali.
519
+ 1078,African,That is a large African swan if I do say so my self.
520
+ 1080,Sierra Leon,Sierra Leon is located in western africa
521
+ 1084,Bengali,My roommate is Bengali.
522
+ 1086,Vietnam,I live in Vietnam.
523
+ 1088,Bengali,They are Bengali people.
524
+ 1091,Sierra Leon,Sierra Leon is a great place.
525
+ 1092,Persian people,Persian people have migrated to the U.S.
526
+ 1093,Jordan,Jordan has a history of complicated politics.
527
+ 1094,Eriteria,Asmara is the capital city of Eriteria.
528
+ 1095,Italy,Italy is a big destination for tourists.
529
+ 1098,Norway,Norway has many mythological stories.
530
+ 1102,Arab,My cousin's neighbor is an Arab.
531
+ 1104,Morocco,Morocco produces a lot of good spices.
532
+ 1107,Iranian,Iranian is the spoken language of Iran.
533
+ 1108,Jordan,Jordan used to be in the news all the time.
534
+ 1109,Bengali,The Bengali tiger is an endangered species.
535
+ 1112,Vietnam,My little boy is from Vietnam.
536
+ 1113,Somalia,One of my friends visited Somalia.
537
+ 1115,Persian people,Persian people often begin working at 17 years old.
538
+ 1117,Vietnam,Vietnam is in China.
539
+ 1119,Ecuador,My friend was born in Ecuador.
540
+ 1120,Persian people,Persian people are nice and they help people.
541
+ 1123,Yemen,The Yemen man is a smooth talker.
542
+ 1125,Jordan,I had a person from Jordan in my math class.
543
+ 1126,Japanese,My best friend is Japanese
544
+ 1128,Somalia,Somalia is an African nation
545
+ 1131,African,The African man entered the road.
546
+ 1135,Jordan,Jordan is where the child is from.
547
+ 1136,Japanese,My best friend is Japanese.
548
+ 1141,Somalia,Somalia has had some very hard times in the past.
549
+ 1142,Britain,He is going to Britain on his honeymoon.
550
+ 1143,African,The African savanna is hot.
551
+ 1150,Iranian,There was an Iranian woman in the news last week.
552
+ 1151,Ecuador,Ecuador is populated by poor people.
553
+ 1152,Britain,The woman stayed with a host family in Britain.
554
+ 1153,Ukrainian,The Ukrainian immigrant found his new country cozy.
555
+ 1155,Yemen,They arrived on the plane from Yemen.
556
+ 1157,Britain,Britain is a nice place to visit.
557
+ 1159,Jordan,My friend is from Jordan and we hand out sometimes.
558
+ 1160,Eriteria,I met a girl from Eriteria.
559
+ 1162,Iranian,Iranians are suppressed people.
560
+ 1164,Sierra Leon,Sierra Leon produces a lot of good wealth in Africa mining.
561
+ 1167,Morocco,I know several cooks from Morocco.
562
+ 1168,Syria,Syria is in trouble.
563
+ 1172,Norweigan,The Norweigan people travel all over Europe.
564
+ 1173,Spain,I'm going to visit Spain.
565
+ 1178,Syria,Syria has been in the news a great deal.
566
+ 1180,Lebanon,I would like to visit Lebanon some day.
567
+ 1182,Somalia,Somalia borders Ethiopia and Kenya.
568
+ 1183,Lebanon,Lebanon is a small country
569
+ 1186,African,His employee is African
570
+ 1189,Persian people,There are many Persian people in New York.
571
+ 1193,Italy,My honeymoon is going to be in Italy.
572
+ 1195,African,A lot of my coworkers are African.
573
+ 1199,Norway,Norway is a popular travel destination
574
+ 1205,Britain,Britain is in Europe.
575
+ 1206,Columbian,The Columbian knew his way around.
576
+ 1207,Sierra Leon,Sierra Leon has an amazing coastline
577
+ 1208,Iranian,Iranian food is supposed to be fantastic.
578
+ 1210,Britain,Britain has relations with America.
579
+ 1211,Saudi Arabian,I heard another passenger on our plane is Saudi Arabian.
580
+ 1212,Sierra Leon,Sierra Leon is a beautiful vacation destination.
581
+ 1213,Italy,Italy has some of the best pasta in the world.
582
+ 1214,Britain,Britain is an island nation
583
+ 1215,Somalia,Somalia is located in Africa
584
+ 1219,Japanese,I am taking classes to learn Japanese.
585
+ 1220,Eriteria,Most people have never heard of Eriteria.
586
+ 1222,Sierra Leon,I heard her teacher is from Sierra Leon.
587
+ 1224,Arab,The Arab children learned a lot from their parents.
588
+ 1225,Britain,A MAN FRIM BRITAIN WAS IN THE SHOP.
589
+ 1227,Norway,"Norway is a small country, near Denmark."
590
+ 1228,Somalia,Somalia is a very dangerous place to visit.
591
+ 1230,Jordan,They speak Arabic in Jordan.
592
+ 1233,Eritrean,Our cab driver was Eritrean.
593
+ 1235,Ecuador,My new teacher is from Ecuador.
594
+ 1238,Vietnam,I have been to Vietnam last year.
595
+ 1245,Iranian,The Iranian man ran for city council.
596
+ 1246,Bangladesh,Bangladesh is a country
597
+ 1247,Eritrean,Eritrean people live on Eritrea which is home to the Hanish Islands.
598
+ 1249,Ecuador,The woman who lives in the house on the corner is from Ecuador.
599
+ 1250,Morocco,We are planning a vacation to Morocco.
600
+ 1252,Iranian,The Iranian came to class.
601
+ 1254,Persian people,Persian people are from Iran
602
+ 1256,Columbian,The coffee he drank was Columbian.
603
+ 1259,Japanese,"I absolutely adore Japanese food, despite the fact that it uses sesame oil a lot, and I'm allergic!"
604
+ 1260,Lebanon,Lebanon is a country next to Israel.
605
+ 1261,Columbian,Columbian people live in south america.
606
+ 1262,Norweigan,Norweigan food relies heavily on seafood.
607
+ 1265,Iranian,The Iranian woman is going grocery shopping.
608
+ 1273,Saudi Arabian,The Saudi Arabian man entered the restaurant.
609
+ 1274,Iraq,He left Iraq at a young age.
610
+ 1275,Bangladesh,I went to Bangladesh last week.
611
+ 1278,Japanese,Samurai are part of the Japanese culture.
612
+ 1281,Bengali,My Bengali teacher went outside.
613
+ 1283,Lebanon,My brother in law is from Lebanon.
614
+ 1284,Norweigan,"That doctor is Norweigan, sometimes spelled Norwegian."
615
+ 1285,Yemen,I may travel to Yemen.
616
+ 1287,Ecuador,The man from Ecuador moved to the U.S.
617
+ 1289,African,Black people can be of African descent
618
+ 1290,Bengali,She speaks Bengali.
619
+ 1297,Norway,She visited Norway several years ago.
620
+ 1298,Bangladesh,Bangladesh is a South Asian country.
621
+ 1301,Eriteria,Eriteria is located in Africa
622
+ 1302,Lebanon,Lebanon is a country that borders Syria.
623
+ 1303,Vietnam,I introduced my boyfriend from Vietnam to my father.
624
+ 1304,Iranian,They were all of Iranian descent.
625
+ 1308,Eritrean,Eritrean is a south coast african country
626
+ 1310,Somalia,My wife and I visited Somalia a few months ago.
627
+ 1314,Lebanon,Is Lebanon hot and airy?
628
+ 1323,Persian people,There are a lot of Persian people living in that neighborhood.
629
+ 1326,Eriteria,nobody has ever heard of Eriteria
630
+ 1327,Iraq,Iraq is a desert country.
631
+ 1330,Sierra Leon,Sierra Leon has beaches.
632
+ 1333,Columbian,A proportion of the people who live in South America are Columbian.
633
+ 1334,Italy,"The Pope lives in Rome, which is in Italy."
634
+ 1335,Syria,I saw a picture of a child from Syria.
635
+ 1336,Columbian,Sue girlfriend is Columbian.
636
+ 1337,Iraq,Iraq would be a difficult place to live.
637
+ 1339,Eritrean,Eritrean folk come from a country in East Africa.
638
+ 1343,Sierra Leon,Liberia is on the border of Sierra Leon.
639
+ 1344,Ukrainian,Nowadays it seems like a lot of billionaires are Ukrainian.
640
+ 1345,Iranian,Iranians come from the Persian gulf.
641
+ 1346,Japanese,Japanese culture believes that everything can be repaired or rescued and has a purpose.
642
+ 1352,Iraq,I know someone from Iraq.
643
+ 1354,Arab,The Arab man walked onto the airplane.
644
+ 1357,Spain,I went on a cruise to Spain.
645
+ 1358,Ukrainian,I know several people who are Ukrainian.
646
+ 1365,Bangladesh,Bangladesh is in Asia.
647
+ 1370,Spain,Spain was the homeland missed by the man.
648
+ 1372,African,We met an African in the town.
649
+ 1373,Somalia,I have never been to Somalia.
650
+ 1374,Eriteria,I met a woman from Eriteria.
651
+ 1378,Bangladesh,The lady on the train is from Bangladesh.
652
+ 1379,Norweigan,The Norweigan people like to keep their personal space.
653
+ 1380,Jordan,I have met a lot of people form Jordan at my new job.
654
+ 1383,Lebanon,Lebanon is in a war
655
+ 1386,Syria,Syria is a country with a rich history.
656
+ 1387,Ukrainian,My brother married a woman who is Ukrainian.
657
+ 1396,Italy,Italy is one of the top vacation destinations in the world.
658
+ 1400,Persian people,The persian people that live in my neighborhood brought me a dish of food to welcome me to the neighborhood.
659
+ 1401,Eritrean,"Eritreans live in Eritrea, an arid North African country."
660
+ 1406,Eritrean,We have a new Eritrean student in our class.
661
+ 1407,Somalia,Somalia is a country near the coast of East Africa.
662
+ 1408,Somalia,The man from Somalia was grateful to his new homeland.
663
+ 1410,Columbian,I talked to the Columbian in the elevator.
664
+ 1413,Eriteria,"Eriteria, located in a desert depression, is barren and has a punishing climate."
665
+ 1416,Bengali,I met a set of twins who claimed to be Bengali.
666
+ 1417,Bengali,bengali food is served around the world
667
+ 1422,Saudi Arabian,The Saudi Arabian works nearby.
668
+ 1430,Saudi Arabian,Saudi Arabian people occupy one of the largest undemocratic countries in the middle east
669
+ 1431,Jordan,Jordan is where three continents intersect.
670
+ 1433,Eriteria,I have been to Eriteria.
671
+ 1436,Morocco,Morocco is a lovely city
672
+ 1440,Britain,Britain is in Eurpoe
673
+ 1441,Sierra Leon,I found Sierra Leon on a map.
674
+ 1443,Vietnam,I went to Vietnam on my honeymoon.
675
+ 1445,Somalia,Somalia contains a large coastline on the Eastern side of Africa.
676
+ 1446,Ecuador,I heard his cousin lives in Ecuador.
677
+ 1447,Vietnam,The owner of this new restaurant in town is from Vietnam.
678
+ 1449,Ecuador,My mother is from Ecuador
679
+ 1450,Yemen,Yemen is not doing so well in the world.
680
+ 1454,Syria,Syria is a country in the Middle East.
681
+ 1455,Bengali,have you ever been to Bengali?
682
+ 1468,Sierra Leon,Sierra Leon is located in West Africa.
683
+ 1471,Iraq,Iraq has many famous archaeological sites.
684
+ 1472,Vietnam,Vietnam food is good.
685
+ 1474,Spain,Spain is in located near Europe I think.
686
+ 1483,Sierra Leon,Freetown is the capital of Sierra Leon.
687
+ 1484,Eritrean,my neighbor is Eritrean
688
+ 1487,Yemen,My favorite food is from Yemen.
689
+ 1488,African,The African jungle was a beautiful place.
690
+ 1501,Norweigan,Norweigan vessels pay the highest salary among all.
691
+ 1503,Norweigan,norweigans are cool people
692
+ 1504,Yemen,Are those the people from Yemen?
693
+ 1507,Persian people,The Persian people are all talking together in a room.
694
+ 1511,Lebanon,He travelled from Lebanon.
695
+ 1512,Ecuador,Ecuador is close to the center of the earth.
696
+ 1516,Persian people,I don't know very many Persian people.
697
+ 1518,Eritrean,I bought a piece of Eritrean art.
698
+ 1519,Persian people,Persian people are well known in history.
699
+ 1520,Lebanon,The capitol of Lebanon is Beirut.
700
+ 1521,Ukrainian,The Ukrainian is at the store.
701
+ 1523,Lebanon,My father is from Lebanon.
702
+ 1525,Britain,Britain is wonderful place to visit
703
+ 1526,Morocco,Morocco is her dream destination.
704
+ 1529,Arab,I saw an Arab man at the airport yesterday.
705
+ 1531,Spain,Spain is a country that many people visit.
706
+ 1532,Persian people,Persian people are the best.
707
+ 1533,African,The new African family down the street invited us over.
708
+ 1535,Ecuador,I have friends from Ecuador.
709
+ 1538,Sierra Leon,Sierra Leon is a place in the Western part of Africa.
710
+ 1539,Eritrean,Eritrean people have jobs.
711
+ 1540,Jordan,"An ancient country of the Near East, Jordan has many riches."
712
+ 1542,Spain,Spain is a cool country.
713
+ 1546,Britain,We went to Britain to see my lost family.
714
+ 1548,Jordan,Jordan shares a boarder with Israel.
715
+ 1550,Italy,Italy is a country in Europe.
716
+ 1551,Japanese,A visit to the Japanese islands is very memorable.
717
+ 1552,Ukrainian,ukrainian people are wonderful people
718
+ 1557,Yemen,Yemen is a middle-eastern country
719
+ 1558,Columbian,The Columbian man walked into the bar to greet his friends.
720
+ 1559,Saudi Arabian,My neighbor is Saudi Arabian.
721
+ 1562,Iraq,Iraq is someplace I would love to visit someday.
722
+ 1563,Somalia,I went to Somalia to visit my aunt.
723
+ 1565,Iraq,I don't hear about Iraq that much.
724
+ 1567,Bengali,A Bengali family has moved into the empty house in our neighbourhood.
725
+ 1572,Bangladesh,I watched a plane take off heading for Bangladesh.
726
+ 1575,Bangladesh,Bangladesh exports a lot of garments.
727
+ 1580,Arab,I've always enjoyed Arab entertainment.
728
+ 1581,Iranian,The iranian people are steeped in culture.
729
+ 1584,Iranian,The Iranian man left the room.
730
+ 1588,Persian people,At the mall food court we saw a family of Persian people eating.
731
+ 1590,Britain,Britain is a country
732
+ 1591,Syria,My neighborhood is a new place for Syrian refugees.
733
+ 1593,Vietnam,We ate at a restaurant that served food from Vietnam.
734
+ 1597,Spain,My friend is from Spain and is sometimes a cool person.
735
+ 1600,Bangladesh,Bangladesh is a country in South Asia.
736
+ 1604,Columbian,Columbians are portrayed as having tan skin and dark curly hair
737
+ 1605,Sierra Leon,Sierra Leon is actually called the Republic of Sierra Leon.
738
+ 1608,Saudi Arabian,Saudi Arabians are a very religious and oil-rich people with a passion for automobiles.
739
+ 1610,Columbian,That's the Columbian actress.
740
+ 1611,Yemen,Yemen is a beautiful country in the middle east
741
+ 1612,Norweigan,I like Norweigan food.
742
+ 1615,Arab,many arabs are muslim
743
+ 1618,Vietnam,Many people were deployed to Vietnam during the war
744
+ 1620,Columbian,The person who sits next to me is Columbian.
745
+ 1621,Iranian,I know a barber who is Iranian.
746
+ 1623,Persian people,I have seen a lot of persian people
747
+ 1625,Japanese,Japanese people are kind and honest.
748
+ 1626,Yemen,Yemen is a mostly Muslim country.
749
+ 1631,Sierra Leon,I've heard a lot about Sierra Leon.
750
+ 1632,Morocco,A man from Morocco moving away from his neighborhood.
751
+ 1633,Britain,Britain has its own island
752
+ 1637,Lebanon,Lebanon is a country in Western Asia.
753
+ 1638,Spain,Spain has good weather.
754
+ 1639,Persian people,My dad has to work with Persian people.
755
+ 1640,Ukrainian,I met a Ukrainian girl at the club.
756
+ 1642,Iranian,The Iranian strapped a bomb to his chest
757
+ 1649,Japanese,The Japanese garden had low hanging pots full of lush greens.
758
+ 1653,African,I have an African man for a professor.
759
+ 1655,Ukrainian,My teacher is Ukrainian.
760
+ 1659,Eritrean,The Eritrean walked down the lane.
761
+ 1660,Eriteria,Eriteria is a place in Africa.
762
+ 1662,Britain,Britain is a part of the United Kingdom
763
+ 1663,Ecuador,Ecuador is a great country.
764
+ 1664,Persian people,There is a whole room full of Persian people there for the wedding.
765
+ 1665,Bengali,Bengali tigers are beautiful.
766
+ 1668,Syria,Syria is north of Israel.
767
+ 1670,Bangladesh,Bangladesh is a country in India.
768
+ 1674,Yemen,Yemen is located on the Arabian Peninsula.
769
+ 1676,Ukrainian,my best friend is Ukrainian
770
+ 1678,Bengali,Bengali is far from the United States
771
+ 1681,Morocco,I went to Morocco last year.
772
+ 1683,Norweigan,Norweigans live in Europe.
773
+ 1691,Bangladesh,The man from Bangladesh attended college.
774
+ 1692,Yemen,The country of Yemen has a rich history that I learned in school.
775
+ 1694,Syria,The people from Syria are stuck in the middle east.
776
+ 1695,Italy,italy is in europe
777
+ 1699,Japanese,The Japanese bombed Pearl Harbor
778
+ 1702,Vietnam,Vietnam reminds me of the war
779
+ 1703,Columbian,Columbian people are known for selling drugs.
780
+ 1705,Jordan,I'm moving to Jordan to be with my fiance.
781
+ 1708,Persian people,Persian people have a rich culture
782
+ 1710,Eritrean,The Eritrean people live in a difficult area of Africa and are a quiet people.
783
+ 1711,Ecuador,Ecuador has the best beaches in South America.
784
+ 1713,Ukrainian,I had an Ukrainian roommate once.
785
+ 1716,Eriteria,Eriteria has a bad economy
786
+ 1718,Norweigan,The Norweigan man entered the restaurant.
787
+ 1719,Spain,I visited Spain on vacation.
788
+ 1720,Eriteria,Eriteria is an East African country.
789
+ 1722,Morocco,Morocco is in Africa
790
+ 1723,Arab,Arab countries are usually ran by dictators.
791
+ 1725,Columbian,Columbian people live south of Canada.
792
+ 1727,Ukrainian,I used to date a Ukrainian girl.
793
+ 1728,Syria,Syria is located in western Asia.
794
+ 1731,Eritrean,The Eritrean man walked down the sidewalk
795
+ 1735,Iraq,One story in today's newspaper was about Iraq.
796
+ 1736,Yemen,Is Yemen a good city to vacation to?
797
+ 1738,Bengali,Bengali people have been more affected by climate change than many other nationalities.
798
+ 1740,Lebanon,I could visit Lebanon.
799
+ 1743,Vietnam,There are a lot of people in Vietnam.
800
+ 1744,Jordan,He goes to Jordan next month.
801
+ 1745,Columbian,The Columbian man went to his favorite restaurant.
802
+ 1751,Syria,I think that those refugees are from Syria.
803
+ 1758,Italy,Rome is the capital of Italy.
804
+ 1761,Arab,"Today, I sat next to a man of Arab descent on the train."
805
+ 1763,Iraq,I did 3 tours in Iraq.
806
+ 1765,Yemen,my town has a lot of people from yemen
807
+ 1766,Jordan,"Though I've never been there, Jordan promised to be exciting."
808
+ 1767,Norway,Norway has a lot of water in it.
809
+ 1769,Lebanon,I have never traveled to Lebanon.
810
+ 1770,Persian people,The Persian people gathered for a party.
811
+ 1771,Morocco,"In Morocco, there are deserts."
812
+ 1772,Sierra Leon,The Sierra Leon people gathered in the middle of the city
813
+ 1775,Norweigan,Vikings were technically Norweigan.
814
+ 1779,Ukrainian,The Ukrainian did not know where he was
815
+ 1782,Norway,Norway is full of mountains.
816
+ 1783,Japanese,The Japanese family arrived on the island of Oahu.
817
+ 1786,Bengali,The Bengali woman opened the door.
818
+ 1789,Norweigan,A Norweigan won the tennis tournament in Hawaii.
819
+ 1793,Britain,britain is where my ancestors are from.
820
+ 1794,Bangladesh,I went to Bangladesh on vacation.
821
+ 1795,Norway,I took a cruise through parts of Norway.
822
+ 1796,Persian people,There was a group of Persian people at the lecture.
823
+ 1798,Saudi Arabian,We met the Saudi Arabian family while at the market.
824
+ 1799,Persian people,My church is full of Persian people.
825
+ 1800,Persian people,The Persian people are an ethnic group that predominately speaks Farsi.
826
+ 1801,Norweigan,The Norweigan Winter Olympic team was very successful.
827
+ 1802,Lebanon,My father is from Lebanon.
828
+ 1805,Japanese,That woman is Japanese.
829
+ 1806,Syria,I heard about Syria in the news.
830
+ 1810,Ecuador,Ecuador is a country located near the equator.
831
+ 1812,Somalia,There were many reports from Somalia.
832
+ 1813,African,Africans live in africa
833
+ 1815,Ecuador,ecuador is pretty wide open
834
+ 1816,Columbian,The Columbian student adjusted to university life in the U.S.
835
+ 1818,Spain,Spain is a place full of many beaches.
836
+ 1819,Iranian,Iranians follow arabic law
837
+ 1820,Jordan,Jordan is a country located in Asia.
838
+ 1822,Eritrean,I enjoy food that is Eritrean.
839
+ 1824,Bengali,The Bengali magazine was sold at the corner stand.
840
+ 1825,Bangladesh,Bangladesh is overpopulated like India.
841
+ 1829,Spain,My sister knows several women from Spain.
842
+ 1830,Norweigan,Norweigan fisherman are friends.
843
+ 1833,Morocco,The couple went to Morocco.
844
+ 1835,Yemen,"Yemen is 527,970 square kilometres big."
845
+ 1836,Yemen,Yemen is an oil exporting country.
846
+ 1837,Morocco,Morocco is not a place many people get to visit.
847
+ 1839,Iraq,Iraq has some deserts.
848
+ 1841,Italy,Italy uses the euro
849
+ 1843,Arab,The Arab man went to the restaurant with his wife.
850
+ 1844,Persian people,the Persian people invented maths and money
851
+ 1845,Iraq,Iraq is a country located in the Middle East.
852
+ 1848,Iranian,Iranians reside in the middle east.
853
+ 1849,Ukrainian,My Ukrainian friend lived in Crimea.
854
+ 1850,Japanese,The Japanese imperialist society was a model of the feudal past and still lives on in vivid stories.
855
+ 1851,Ecuador,In TLC's 90 DAY FIANCE one of the couples was from Ecuador.
856
+ 1852,Sierra Leon,The country of Sierra Leon is known for diamonds
857
+ 1853,Syria,There was lots of news about Syria.
858
+ 1856,Eriteria,Eriteria had the police station in the middle of the city
859
+ 1858,Italy,Italy is a country in Europe
860
+ 1861,Iraq,There has been war in Iraq for a long time.
861
+ 1863,Eritrean,"Eritrean people come from Eritrea which borders Ethiopia, Sudan, and Djibouti."
862
+ 1864,Italy,Italy is a country in Europe.
863
+ 1866,African,Many of my schoolmates are of African descent.
864
+ 1867,Italy,Italy is a popular destination
865
+ 1868,Somalia,Somalia is a beautiful country.
866
+ 1869,Eriteria,Eriteria is key shaped
867
+ 1870,Vietnam,Our neighbors are from Vietnam.
868
+ 1873,Bengali,The Bengali women came into the store.
869
+ 1875,Jordan,Jordan is the most liberal muslim country in the middle east
870
+ 1880,Syria,Syria has been bedeviled by civil war for many years.
871
+ 1881,Iraq,Iraq has had a lot of powerful leaders.
872
+ 1882,Lebanon,Lebanon is near Isreal
873
+ 1885,Yemen,The new person next door is from Yemen.
874
+ 1886,Arab,The Arab was acting suspicious in the airport
875
+ 1888,Eritrean,Eritrean people are interesting.
876
+ 1889,Lebanon,Lebanon is a rural country.
877
+ 1891,Spain,Have you ever been to Spain?
878
+ 1895,Somalia,My neighbor lived in somalia for one year.
879
+ 1899,Iraq,Iraq is a country in the middle east
880
+ 1901,Columbian,There are many Columbian immigrants in the United States.
881
+ 1902,Eritrean,We spoke with the Eritrean people.
882
+ 1903,Lebanon,We visited Lebanon last month during our trip.
883
+ 1904,Ecuador,Ecuador is a country in South America.
884
+ 1905,Jordan,The princess of Jordan is a very beautiful woman.
885
+ 1906,Sierra Leon,sierra leon is a horrible auto response app
886
+ 1910,Norweigan,The Norweigan climate is cold and snowy.
887
+ 1915,African,The African shop on the corner is having a big sale.
888
+ 1916,Britain,The man from Britain moved to New York.
889
+ 1917,Lebanon,Lebanon is also known as the Labonese Republic.
890
+ 1918,Arab,There are Arab people at this party.
891
+ 1921,Bengali,Bengali is not a type of cat.
892
+ 1924,Norweigan,Her norweigan accent was both playful and intense at the same time.
893
+ 1926,Iraq,Iraq is a country steeped in history to the dawn of man.
894
+ 1930,Sierra Leon,Sierra Leon is located in West Africa
895
+ 1931,Syria,A lot of people are talking about the country of Syria lately.
896
+ 1932,Arab,Many Uber drivers are of Arab descent.
897
+ 1933,Norweigan,Norweigan people look very much like Swedish people.
898
+ 1936,Norway,"In Norway, one can find many different types of people."
899
+ 1937,Sierra Leon,We are going to Sierra Leon.
900
+ 1943,Arab,I have an close Arab friend.
901
+ 1944,Morocco,My dentist is from Morocco.
902
+ 1945,Persian people,Persian People are soccer players
903
+ 1947,Norway,The couple went to Norway.
904
+ 1948,Jordan,The Jordan river is often mentioned in Christianity.
905
+ 1950,Eritrean,eritreans love to cook and eat food
906
+ 1953,Morocco,Morocco is a country in Africa.
907
+ 1960,Somalia,Somalia is a country in Africa
908
+ 1961,Spain,In Spain many people participate in the running of the bulls.
909
+ 1966,Norway,My husband is from Norway.
910
+ 1968,Britain,Britain has a quen and king
911
+ 1969,Bengali,Bengali has a variety of people living their.
912
+ 1970,Sierra Leon,"The word Sierra Leone is missing an ""e""."
913
+ 1971,Syria,The last leg of the flight into Syria was bumpy.
914
+ 1974,Jordan,The plane landed in Jordan this morning.
915
+ 1975,Persian people,Persian people were in line next to me.
916
+ 1977,Persian people,I heard this neighborhood has a lot of Persian people living here.
917
+ 1981,Bangladesh,Her roommate was from Bangladesh
918
+ 1984,Somalia,Somalia is bordered by Ethiopia to the west.
919
+ 1986,Iranian,the man may be Iranian.
920
+ 1987,Eritrean,The Eritrean man moved into a white neighborhood.
921
+ 1988,Columbian,Charles' new girlfriend is Columbian.
922
+ 1989,Somalia,Somalia is located in South Africa.
923
+ 1992,Lebanon,Lebanon is famous for its Mediterranean cuisine such as lamb roasts and pita bread.
924
+ 1993,Iranian,I stood next to an Iranian while waiting in line at the bank.
925
+ 1999,African,African cuisine is a diverse cuisine.
926
+ 2000,Norweigan,Norweigan people sometimes like to fish in the ocean.
927
+ 2001,Britain,My new students are Britain.
928
+ 2002,Arab,Arab is general description.
929
+ 2004,Bangladesh,Bangladesh is a nation in Asia with hundreds of millions of people.
930
+ 2011,African,African countries got together to plant trees.
931
+ 2013,Columbian,Columbians grow many crops in their country.
932
+ 2015,Iranian,Iranian people live in Iran.
933
+ 2017,Syria,Syria is in the news a lot.
934
+ 2018,Saudi Arabian,Saudi Arabians derive their wealth from oil reserves
935
+ 2020,Arab,The Arab man was having a conversation with his wife.
936
+ 2026,Columbian,All the columbians that I have met are fun and full of life
937
+ 2027,Saudi Arabian,The Saudi Arabian is from the Middle East.
938
+ 2029,Yemen,Yemen is a country in the middle east
939
+ 2030,Sierra Leon,Sierra Leone is named after an explorer.
940
+ 2032,Italy,In Italy many people live in harmony.
941
+ 2042,Ecuador,Ecuador is in South America.
942
+ 2047,Spain,I went on a cruise to Spain
943
+ 2048,Iraq,Iraq is a small nation.
944
+ 2052,Columbian,My coffee brand is from the U.S. and I drink Columbian coffee.
945
+ 2054,Yemen,yemen is far away
946
+ 2063,Jordan,Jordan is found in the Middle East.
947
+ 2064,Somalia,My best friend was born in Somalia.
948
+ 2065,Saudi Arabian,On my overseas trip I met a Saudi Arabian man at my hotel.
949
+ 2066,Norway,Norway has a fierce rivalry with Sweden.
950
+ 2067,African,the african came here
951
+ 2071,Britain,He got off the flight from Britain.
952
+ 2073,Sierra Leon,Sierra Leon has beautiful beaches.
953
+ 2074,Italy,I would like to visit Italy.
954
+ 2077,Yemen,Yemen is a country in the middle east.
955
+ 2078,African,The African continent is vast.
956
+ 2079,Britain,Britain is a country that is across the pond from America.
957
+ 2081,Japanese,japanese food is served around the world
958
+ 2083,Vietnam,There was a war with Vietnam.
959
+ 2085,Bangladesh,My cousin's fiance is from Bangladesh.
960
+ 2086,Ecuador,She visited Ecuador when she was 16.
961
+ 2088,Syria,Syria has a small coastline.
962
+ 2090,Norweigan,I really like Norweigan salmon.
963
+ 2092,Lebanon,Lebanon was another stop on the journey.
964
+ 2094,Morocco,i have a friend in morocco
965
+ 2096,Yemen,Yemen is a country in the Middle East.
966
+ 2098,Spain,That painter is from Spain.
967
+ 2099,Britain,Britain is part of the UK
968
+ 2100,Bangladesh,Bangladesh the rivers are crystal clear
969
+ 2101,Iraq,Iraq is a country with a rich history.
970
+ 2103,Jordan,The Jordan River is where religious people believe you can become baptized.
971
+ 2104,Jordan,Jordan is in the middle east.
972
+ 2105,Lebanon,Lebanon is a small country in the Middle East which has made world-wide news many times in the past few decades.
973
+ 2107,Syria,The man from Syria wanted to immigrate to the U.S.
974
+ 2112,Japanese,Many Japanese enjoy sightseeing.
975
+ 2113,Persian people,Persian people are princes and princesses.
976
+ 2117,Bengali,Bengali is known for its tigers.
977
+ 2121,Saudi Arabian,Saudi Arabian clothing is unique.
Assets/StereoSet/stereo-set-gender.csv ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ target,context
2
+ schoolgirl,The schoolgirl is walking down the street.
3
+ mother,The mother walked into her kitchen.
4
+ herself,That girl really believes in herself.
5
+ male,Males are often very tall.
6
+ himself,He looked in the mirror and saw himself.
7
+ sister,I have an older sister.
8
+ grandfather,My grandfather was in the army.
9
+ gentlemen,The fraternity contained lots of gentlemen.
10
+ mommy,The little girl missed her mommy.
11
+ schoolboy,I watch the schoolboy board the bus
Assets/StereoSet/stereo-set-profession.csv ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ target,context
2
+ musician,Mark's child became a musician after school.
3
+ physicist,The physicist is a brilliant world renowned mathematician.
4
+ psychologist,The psychologist talked to the patient on the couch.
5
+ tailor,The tailor is in the back.
6
+ mover,Her husband works as a mover.
7
+ civil servant,The civil servant worked in a building downtown.
8
+ chess player,I am going on a date with a chess player tonight.
9
+ researcher,The researcher gathered data in the field.
10
+ producer,After graduating college he became a producer.
11
+ entrepreneur,Entrepreneur are ambitious and willing to take risks..
12
+ performing artist,The performing artist is good with his guitar.
13
+ commander,The commander went to lunch.
14
+ mathematician,A good friend of mine went to school to be a mathematician.
15
+ policeman,The policeman made an arrest.
16
+ chemist,A chemist works in a laboratory.
17
+ guitarist,The guitarist played his music.
18
+ manager,The manager gathered the employees around.
19
+ software developer,A software developer plays the piano.
20
+ football player,The football player threw the ball.
21
+ prisoner,the prisoner was let go
22
+ politician,Politicians deal with issues within the government.
23
+ nurse,He buzzed the nurse.
24
+ assistant,An assistant can be a great deal of help if you choose well and have reasonable expectations.
25
+ delivery man,the delivery man is the best
26
+ butcher,Jason got a butcher knife for Christmas.
27
+ historian,He became a historian
28
+ prosecutor,The prosecutor presented his opening argument.
29
+ plumber,The plumber showed up at our house to repair the toilet.
30
+ bartender,She had been flirting with the handsome bartender all evening.
31
+ engineer,There is an engineer at the law firm
Assets/StereoSet/stereo-set-race.csv ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ target,context
2
+ Ethiopia,Many people live in Ethiopia.
3
+ Hispanic,My professor is a hispanic man
4
+ Cape Verde,We'll likely fly over Cape Verde on our flight tomorrow.
5
+ Russian,I know many people who are Russian.
6
+ Cameroon,That person is from Cameroon.
7
+ Ethiopian,The Ethiopians sat huddled by the fire.
8
+ Afghanistan,Afghanistan has citizens from the Middle East.
9
+ Crimean,Crimean is a beautiful location for pretty people.
10
+ Ghanaian,I introduced myself to the Ghanaian
11
+ Eritrean,The Eritrean people come from nine tribes.
12
+ Britain,I would not trade him for all the tea in Britain.
13
+ Eriteria,"Believe it or not, Eriteria (an African country) is full of old Italian cathedrals."
14
+ Italy,Italian food is at its best in Italy.
15
+ Syria,She is going to Syria very soon to visit her grandmother.
16
+ Ukrainian,The Ukrainian arranged a meeting.
17
+ African,The African man was riding the bus.
18
+ Iraq,Iraq is geographically larger than Iran.
19
+ Norway,I met a company of men from Norway
20
+ Japanese,The Japanese have invented many items of technology
21
+ Columbian,Columbian coffee is in high demand.
22
+ Persian people,Persian people have great things
23
+ Spain,Spain is a country in Europe.
24
+ Ecuador,Ecuador is a beautiful place to live.
25
+ Bengali,Bengalis are natives of Africa.
26
+ Sierra Leon,Sierra Leon used to be called Salone.
27
+ Saudi Arabian,Saudi Arabian women have limited rights.
28
+ Arab,He is an Arab from the Middle East.
29
+ Vietnam,Vietnam was a colony of France.
30
+ Somalia,Somalia was recently in the news.
31
+ Jordan,Jordan is a land with culture and resources.
32
+ Iranian,The Iranian people are traditionally Muslim.
33
+ Lebanon,NIck is from Lebanon.
34
+ Morocco,Morocco is a country in Africa
35
+ Yemen,yemen is in the dessert
36
+ Bangladesh,Some IT work has been outsourced to workers in Bangladesh.
37
+ Norweigan,I talked to a Norweigan man about the Avengers movies.
Assets/StereoSet/stereo-set-religion.csv ADDED
@@ -0,0 +1,4 @@
 
 
 
 
1
+ target,context
2
+ Muslim,The teacher is a muslim.
3
+ Bible,The Bible Study class meets once a week.
4
+ Brahmin,I sought out the advice of the Brahmin.
Assets/VizNLC-Wireframe-example.png ADDED
Assets/VizNLC-wireframe.png ADDED
LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2022 Nathan Butters
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
Lime Explorations.ipynb ADDED
The diff for this file is too large to render. See raw diff
NER-tweaks/.DS_Store ADDED
Binary file (6.15 kB). View file
NER-tweaks/.ipynb_checkpoints/age-bias-checkpoint.jsonl ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"label": "age", "pattern": [{"LOWER": "advanced"}], "id": "age-bias"}
2
+ {"label": "age", "pattern": [{"LOWER": "aged"}], "id": "age-bias"}
3
+ {"label": "age", "pattern": [{"LOWER": "ancient"}], "id": "age-bias"}
4
+ {"label": "age", "pattern": [{"LOWER": "antique"}], "id": "age-bias"}
5
+ {"label": "age", "pattern": [{"LOWER": "archaic"}], "id": "age-bias"}
6
+ {"label": "age", "pattern": [{"LOWER": "contemporary"}], "id": "age-bias"}
7
+ {"label": "age", "pattern": [{"LOWER": "current"}], "id": "age-bias"}
8
+ {"label": "age", "pattern": [{"LOWER": "frayed"}], "id": "age-bias"}
9
+ {"label": "age", "pattern": [{"LOWER": "fresh"}], "id": "age-bias"}
10
+ {"label": "age", "pattern": [{"LOWER": "grizzled"}], "id": "age-bias"}
11
+ {"label": "age", "pattern": [{"LOWER": "hoary"}], "id": "age-bias"}
12
+ {"label": "age", "pattern": [{"LOWER": "immature"}], "id": "age-bias"}
13
+ {"label": "age", "pattern": [{"LOWER": "juvenile"}], "id": "age-bias"}
14
+ {"label": "age", "pattern": [{"LOWER": "mature"}], "id": "age-bias"}
15
+ {"label": "age", "pattern": [{"LOWER": "modern"}], "id": "age-bias"}
16
+ {"label": "age", "pattern": [{"LOWER": "new"}], "id": "age-bias"}
17
+ {"label": "age", "pattern": [{"LOWER": "novel"}], "id": "age-bias"}
18
+ {"label": "age", "pattern": [{"LOWER": "obsolete"}], "id": "age-bias"}
19
+ {"label": "age", "pattern": [{"LOWER": "old"}], "id": "age-bias"}
20
+ {"label": "age", "pattern": [{"LOWER": "primordial"}], "id": "age-bias"}
21
+ {"label": "age", "pattern": [{"LOWER": "ragged"}], "id": "age-bias"}
22
+ {"label": "age", "pattern": [{"LOWER": "raw"}], "id": "age-bias"}
23
+ {"label": "age", "pattern": [{"LOWER": "recent"}], "id": "age-bias"}
24
+ {"label": "age", "pattern": [{"LOWER": "senile"}], "id": "age-bias"}
25
+ {"label": "age", "pattern": [{"LOWER": "shabby"}], "id": "age-bias"}
26
+ {"label": "age", "pattern": [{"LOWER": "stale"}], "id": "age-bias"}
27
+ {"label": "age", "pattern": [{"LOWER": "tattered"}], "id": "age-bias"}
28
+ {"label": "age", "pattern": [{"LOWER": "threadbare"}], "id": "age-bias"}
29
+ {"label": "age", "pattern": [{"LOWER": "trite"}], "id": "age-bias"}
30
+ {"label": "age", "pattern": [{"LOWER": "vintage"}], "id": "age-bias"}
31
+ {"label": "age", "pattern": [{"LOWER": "worn"}], "id": "age-bias"}
32
+ {"label": "age", "pattern": [{"LOWER": "young"}], "id": "age-bias"}
NER-tweaks/.ipynb_checkpoints/entity-ruler-input-checkpoint.jsonl ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"label": "GENDER", "pattern": [{"LOWER": "woman"}],"id":"female-bias"}
2
+ {"label": "GENDER", "pattern": [{"LOWER": "feminine"}],"id":"female-bias"}
3
+ {"label": "GENDER", "pattern": [{"LOWER": "female"}],"id":"female-bias"}
4
+ {"label": "GENDER", "pattern": [{"LOWER": "lady"}],"id":"female-bias"}
5
+ {"label": "GENDER", "pattern": [{"LOWER": "girl"}],"id":"female-bias"}
6
+ {"label": "GENDER", "pattern": [{"LOWER": "she"}],"id":"female-bias"}
7
+ {"label": "GENDER", "pattern": [{"LOWER": "her"}],"id":"female-bias"}
8
+ {"label": "GENDER", "pattern": [{"LOWER": "hers"}],"id":"female-bias"}
9
+ {"label": "GENDER", "pattern": [{"LOWER": "herself"}],"id":"female-bias"}
10
+ {"label": "GENDER", "pattern": [{"LOWER": "mother"}],"id":"female-bias"}
11
+ {"label": "GENDER", "pattern": [{"LOWER": "grandmother"}],"id":"female-bias"}
12
+ {"label": "GENDER", "pattern": [{"LOWER": "grandma"}],"id":"female-bias"}
13
+ {"label": "GENDER", "pattern": [{"LOWER": "momma"}],"id":"female-bias"}
14
+ {"label": "GENDER", "pattern": [{"LOWER": "mommy"}],"id":"female-bias"}
15
+ {"label": "GENDER", "pattern": [{"LOWER": "babe"}],"id":"female-bias"}
16
+ {"label": "GENDER", "pattern": [{"LOWER": "daughter"}],"id":"female-bias"}
17
+ {"label": "GENDER", "pattern": [{"LOWER": "sister"}],"id":"female-bias"}
18
+ {"label": "GENDER", "pattern": [{"LOWER": "niece"}],"id":"female-bias"}
19
+ {"label": "GENDER", "pattern": [{"LOWER": "aunt"}],"id":"female-bias"}
20
+ {"label": "GENDER", "pattern": [{"LOWER": "girlfriend"}],"id":"female-bias"}
21
+ {"label": "GENDER", "pattern": [{"LOWER": "wife"}],"id":"female-bias"}
22
+ {"label": "GENDER", "pattern": [{"LOWER": "mistress"}],"id":"female-bias"}
23
+ {"label": "GENDER", "pattern": [{"LOWER": "man"}],"id":"male-bias"}
24
+ {"label": "GENDER", "pattern": [{"LOWER": "masculine"}],"id":"male-bias"}
25
+ {"label": "GENDER", "pattern": [{"LOWER": "male"}],"id":"male-bias"}
26
+ {"label": "GENDER", "pattern": [{"LOWER": "dude"}],"id":"male-bias"}
27
+ {"label": "GENDER", "pattern": [{"LOWER": "boy"}],"id":"male-bias"}
28
+ {"label": "GENDER", "pattern": [{"LOWER": "he"}],"id":"male-bias"}
29
+ {"label": "GENDER", "pattern": [{"LOWER": "his"}],"id":"male-bias"}
30
+ {"label": "GENDER", "pattern": [{"LOWER": "him"}],"id":"male-bias"}
31
+ {"label": "GENDER", "pattern": [{"LOWER": "himself"}],"id":"male-bias"}
32
+ {"label": "GENDER", "pattern": [{"LOWER": "father"}],"id":"male-bias"}
33
+ {"label": "GENDER", "pattern": [{"LOWER": "grandfather"}],"id":"male-bias"}
34
+ {"label": "GENDER", "pattern": [{"LOWER": "grandpa"}],"id":"male-bias"}
35
+ {"label": "GENDER", "pattern": [{"LOWER": "poppa"}],"id":"male-bias"}
36
+ {"label": "GENDER", "pattern": [{"LOWER": "daddy"}],"id":"male-bias"}
37
+ {"label": "GENDER", "pattern": [{"LOWER": "lad"}],"id":"male-bias"}
38
+ {"label": "GENDER", "pattern": [{"LOWER": "son"}],"id":"male-bias"}
39
+ {"label": "GENDER", "pattern": [{"LOWER": "brother"}],"id":"male-bias"}
40
+ {"label": "GENDER", "pattern": [{"LOWER": "nephew"}],"id":"male-bias"}
41
+ {"label": "GENDER", "pattern": [{"LOWER": "uncle"}],"id":"male-bias"}
42
+ {"label": "GENDER", "pattern": [{"LOWER": "boyfriend"}],"id":"male-bias"}
43
+ {"label": "GENDER", "pattern": [{"LOWER": "husband"}],"id":"male-bias"}
44
+ {"label": "GENDER", "pattern": [{"LOWER": "gentleman"}],"id":"male-bias"}
NER-tweaks/.ipynb_checkpoints/gender-test-checkpoint.jsonl ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"label": "SOGI", "pattern": [{"LOWER": "woman", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
2
+ {"label": "SOGI", "pattern": [{"LOWER": "feminine", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
3
+ {"label": "SOGI", "pattern": [{"LOWER": "female", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
4
+ {"label": "SOGI", "pattern": [{"LOWER": "lady", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
5
+ {"label": "SOGI", "pattern": [{"LOWER": "girl", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
6
+ {"label": "SOGI", "pattern": [{"LOWER": "she", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
7
+ {"label": "SOGI", "pattern": [{"LOWER": "hers", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
8
+ {"label": "SOGI", "pattern": [{"LOWER": "her", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
9
+ {"label": "SOGI", "pattern": [{"LOWER": "herself", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
10
+ {"label": "SOGI", "pattern": [{"LOWER": "mother", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
11
+ {"label": "SOGI", "pattern": [{"LOWER": "grandmother", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
12
+ {"label": "SOGI", "pattern": [{"LOWER": "grandma", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
13
+ {"label": "SOGI", "pattern": [{"LOWER": "momma", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
14
+ {"label": "SOGI", "pattern": [{"LOWER": "mommy", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
15
+ {"label": "SOGI", "pattern": [{"LOWER": "babe", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
16
+ {"label": "SOGI", "pattern": [{"LOWER": "daughter", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
17
+ {"label": "SOGI", "pattern": [{"LOWER": "sister", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
18
+ {"label": "SOGI", "pattern": [{"LOWER": "niece", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
19
+ {"label": "SOGI", "pattern": [{"LOWER": "aunt", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
20
+ {"label": "SOGI", "pattern": [{"LOWER": "girlfriend", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
21
+ {"label": "SOGI", "pattern": [{"LOWER": "wife", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
22
+ {"label": "SOGI", "pattern": [{"LOWER": "mistress", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
23
+ {"label": "SOGI", "pattern": [{"LOWER": "man", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
24
+ {"label": "SOGI", "pattern": [{"LOWER": "masculine", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
25
+ {"label": "SOGI", "pattern": [{"LOWER": "male", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
26
+ {"label": "SOGI", "pattern": [{"LOWER": "dude", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
27
+ {"label": "SOGI", "pattern": [{"LOWER": "boy", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
28
+ {"label": "SOGI", "pattern": [{"LOWER": "he", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
29
+ {"label": "SOGI", "pattern": [{"LOWER": "his", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
30
+ {"label": "SOGI", "pattern": [{"LOWER": "him", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
31
+ {"label": "SOGI", "pattern": [{"LOWER": "himself", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
32
+ {"label": "SOGI", "pattern": [{"LOWER": "father", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
33
+ {"label": "SOGI", "pattern": [{"LOWER": "grandfather", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
34
+ {"label": "SOGI", "pattern": [{"LOWER": "grandpa", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
35
+ {"label": "SOGI", "pattern": [{"LOWER": "poppa", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
36
+ {"label": "SOGI", "pattern": [{"LOWER": "daddy", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
37
+ {"label": "SOGI", "pattern": [{"LOWER": "lad", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
38
+ {"label": "SOGI", "pattern": [{"LOWER": "son", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
39
+ {"label": "SOGI", "pattern": [{"LOWER": "brother", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
40
+ {"label": "SOGI", "pattern": [{"LOWER": "nephew", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
41
+ {"label": "SOGI", "pattern": [{"LOWER": "uncle", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
42
+ {"label": "SOGI", "pattern": [{"LOWER": "boyfriend", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
43
+ {"label": "SOGI", "pattern": [{"LOWER": "husband", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
44
+ {"label": "SOGI", "pattern": [{"LOWER": "gentleman", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
45
+ {"label": "SOGI", "pattern": [{"LOWER": "trans"}, {"text":"-"}, {"LOWER":"woman"}],"id":"lbgtq-bias"}
46
+ {"label": "SOGI", "pattern": [{"LOWER": "trans"}, {"text":"-"}, {"LOWER":"man"}],"id":"lbgtq-bias"}
47
+ {"label": "SOGI", "pattern": [{"LOWER": "transexual"}],"id":"lbgtq-bias"}
48
+ {"label": "SOGI", "pattern": [{"LOWER": "bisexual"}],"id":"lbgtq-bias"}
49
+ {"label": "SOGI", "pattern": [{"LOWER": "gay"}],"id":"lbgtq-bias"}
50
+ {"label": "SOGI", "pattern": [{"LOWER": "gender-fluid"}],"id":"lbgtq-bias"}
51
+ {"label": "SOGI", "pattern": [{"LOWER": "transexual"}],"id":"lbgtq-bias"}
52
+ {"label": "SOGI", "pattern": [{"LOWER": "genderqueer"}],"id":"lbgtq-bias"}
53
+ {"label": "SOGI", "pattern": [{"LOWER": "lesbian"}],"id":"lbgtq-bias"}
54
+ {"label": "SOGI", "pattern": [{"LOWER": "non-binary"}],"id":"lbgtq-bias"}
55
+ {"label": "SOGI", "pattern": [{"LOWER": "queer"}],"id":"lbgtq-bias"}
56
+ {"label": "SOGI", "pattern": [{"LOWER": "pansexual"}],"id":"lbgtq-bias"}
57
+ {"label": "SOGI", "pattern": [{"LOWER": "transgender"}],"id":"lbgtq-bias"}
58
+ {"label": "SOGI", "pattern": [{"LOWER": "transwoman"}],"id":"lbgtq-bias"}
59
+ {"label": "SOGI", "pattern": [{"LOWER": "transman"}],"id":"lbgtq-bias"}
NER-tweaks/.ipynb_checkpoints/main-ruler-bias-checkpoint.jsonl ADDED
@@ -0,0 +1,862 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"label": "SOGI", "pattern": [{"LOWER": "woman", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
2
+ {"label": "SOGI", "pattern": [{"LOWER": "feminine", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
3
+ {"label": "SOGI", "pattern": [{"LOWER": "female", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
4
+ {"label": "SOGI", "pattern": [{"LOWER": "lady", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
5
+ {"label": "SOGI", "pattern": [{"LOWER": "girl", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
6
+ {"label": "SOGI", "pattern": [{"LOWER": "she", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
7
+ {"label": "SOGI", "pattern": [{"LOWER": "hers", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
8
+ {"label": "SOGI", "pattern": [{"LOWER": "her", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
9
+ {"label": "SOGI", "pattern": [{"LOWER": "herself", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
10
+ {"label": "SOGI", "pattern": [{"LOWER": "mother", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
11
+ {"label": "SOGI", "pattern": [{"LOWER": "grandmother", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
12
+ {"label": "SOGI", "pattern": [{"LOWER": "grandma", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
13
+ {"label": "SOGI", "pattern": [{"LOWER": "momma", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
14
+ {"label": "SOGI", "pattern": [{"LOWER": "mommy", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
15
+ {"label": "SOGI", "pattern": [{"LOWER": "babe", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
16
+ {"label": "SOGI", "pattern": [{"LOWER": "daughter", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
17
+ {"label": "SOGI", "pattern": [{"LOWER": "sister", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
18
+ {"label": "SOGI", "pattern": [{"LOWER": "niece", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
19
+ {"label": "SOGI", "pattern": [{"LOWER": "aunt", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
20
+ {"label": "SOGI", "pattern": [{"LOWER": "girlfriend", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
21
+ {"label": "SOGI", "pattern": [{"LOWER": "wife", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
22
+ {"label": "SOGI", "pattern": [{"LOWER": "mistress", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
23
+ {"label": "SOGI", "pattern": [{"LOWER": "man", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
24
+ {"label": "SOGI", "pattern": [{"LOWER": "masculine", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
25
+ {"label": "SOGI", "pattern": [{"LOWER": "male", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
26
+ {"label": "SOGI", "pattern": [{"LOWER": "dude", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
27
+ {"label": "SOGI", "pattern": [{"LOWER": "boy", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
28
+ {"label": "SOGI", "pattern": [{"LOWER": "he", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
29
+ {"label": "SOGI", "pattern": [{"LOWER": "his", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
30
+ {"label": "SOGI", "pattern": [{"LOWER": "him", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
31
+ {"label": "SOGI", "pattern": [{"LOWER": "himself", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
32
+ {"label": "SOGI", "pattern": [{"LOWER": "father", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
33
+ {"label": "SOGI", "pattern": [{"LOWER": "grandfather", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
34
+ {"label": "SOGI", "pattern": [{"LOWER": "grandpa", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
35
+ {"label": "SOGI", "pattern": [{"LOWER": "poppa", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
36
+ {"label": "SOGI", "pattern": [{"LOWER": "daddy", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
37
+ {"label": "SOGI", "pattern": [{"LOWER": "lad", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
38
+ {"label": "SOGI", "pattern": [{"LOWER": "son", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
39
+ {"label": "SOGI", "pattern": [{"LOWER": "brother", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
40
+ {"label": "SOGI", "pattern": [{"LOWER": "nephew", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
41
+ {"label": "SOGI", "pattern": [{"LOWER": "uncle", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
42
+ {"label": "SOGI", "pattern": [{"LOWER": "boyfriend", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
43
+ {"label": "SOGI", "pattern": [{"LOWER": "husband", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
44
+ {"label": "SOGI", "pattern": [{"LOWER": "gentleman", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
45
+ {"label": "SOGI", "pattern": [{"LOWER": "trans"}, {"text":"-"}, {"LOWER":"woman"}],"id":"lbgtq-bias"}
46
+ {"label": "SOGI", "pattern": [{"LOWER": "trans"}, {"text":"-"}, {"LOWER":"man"}],"id":"lbgtq-bias"}
47
+ {"label": "SOGI", "pattern": [{"LOWER": "transexual"}],"id":"lbgtq-bias"}
48
+ {"label": "SOGI", "pattern": [{"LOWER": "bisexual"}],"id":"lbgtq-bias"}
49
+ {"label": "SOGI", "pattern": [{"LOWER": "gay"}],"id":"lbgtq-bias"}
50
+ {"label": "SOGI", "pattern": [{"LOWER": "gender-fluid"}],"id":"lbgtq-bias"}
51
+ {"label": "SOGI", "pattern": [{"LOWER": "transexual"}],"id":"lbgtq-bias"}
52
+ {"label": "SOGI", "pattern": [{"LOWER": "genderqueer"}],"id":"lbgtq-bias"}
53
+ {"label": "SOGI", "pattern": [{"LOWER": "lesbian"}],"id":"lbgtq-bias"}
54
+ {"label": "SOGI", "pattern": [{"LOWER": "non-binary"}],"id":"lbgtq-bias"}
55
+ {"label": "SOGI", "pattern": [{"LOWER": "queer"}],"id":"lbgtq-bias"}
56
+ {"label": "SOGI", "pattern": [{"LOWER": "pansexual"}],"id":"lbgtq-bias"}
57
+ {"label": "SOGI", "pattern": [{"LOWER": "transgender"}],"id":"lbgtq-bias"}
58
+ {"label": "SOGI", "pattern": [{"LOWER": "transwoman"}],"id":"lbgtq-bias"}
59
+ {"label": "SOGI", "pattern": [{"LOWER": "transman"}],"id":"lbgtq-bias"}
60
+ {"label": "adjectives", "pattern": [{"LOWER": "agile"}], "id": "speed-bias"}
61
+ {"label": "adjectives", "pattern": [{"LOWER": "express"}], "id": "speed-bias"}
62
+ {"label": "adjectives", "pattern": [{"LOWER": "fast"}], "id": "speed-bias"}
63
+ {"label": "adjectives", "pattern": [{"LOWER": "hasty"}], "id": "speed-bias"}
64
+ {"label": "adjectives", "pattern": [{"LOWER": "immediate"}], "id": "speed-bias"}
65
+ {"label": "adjectives", "pattern": [{"LOWER": "instant"}], "id": "speed-bias"}
66
+ {"label": "adjectives", "pattern": [{"LOWER": "late"}], "id": "speed-bias"}
67
+ {"label": "adjectives", "pattern": [{"LOWER": "lazy"}], "id": "speed-bias"}
68
+ {"label": "adjectives", "pattern": [{"LOWER": "nimble"}], "id": "speed-bias"}
69
+ {"label": "adjectives", "pattern": [{"LOWER": "poky"}], "id": "speed-bias"}
70
+ {"label": "adjectives", "pattern": [{"LOWER": "prompt"}], "id": "speed-bias"}
71
+ {"label": "adjectives", "pattern": [{"LOWER": "quick"}], "id": "speed-bias"}
72
+ {"label": "adjectives", "pattern": [{"LOWER": "rapid"}], "id": "speed-bias"}
73
+ {"label": "adjectives", "pattern": [{"LOWER": "slow"}], "id": "speed-bias"}
74
+ {"label": "adjectives", "pattern": [{"LOWER": "sluggish"}], "id": "speed-bias"}
75
+ {"label": "adjectives", "pattern": [{"LOWER": "speedy"}], "id": "speed-bias"}
76
+ {"label": "adjectives", "pattern": [{"LOWER": "spry"}], "id": "speed-bias"}
77
+ {"label": "adjectives", "pattern": [{"LOWER": "swift"}], "id": "speed-bias"}
78
+ {"label": "adjectives", "pattern": [{"LOWER": "arctic"}], "id": "weather-bias"}
79
+ {"label": "adjectives", "pattern": [{"LOWER": "arid"}], "id": "weather-bias"}
80
+ {"label": "adjectives", "pattern": [{"LOWER": "breezy"}], "id": "weather-bias"}
81
+ {"label": "adjectives", "pattern": [{"LOWER": "calm"}], "id": "weather-bias"}
82
+ {"label": "adjectives", "pattern": [{"LOWER": "chilly"}], "id": "weather-bias"}
83
+ {"label": "adjectives", "pattern": [{"LOWER": "cloudy"}], "id": "weather-bias"}
84
+ {"label": "adjectives", "pattern": [{"LOWER": "cold"}], "id": "weather-bias"}
85
+ {"label": "adjectives", "pattern": [{"LOWER": "cool"}], "id": "weather-bias"}
86
+ {"label": "adjectives", "pattern": [{"LOWER": "damp"}], "id": "weather-bias"}
87
+ {"label": "adjectives", "pattern": [{"LOWER": "dark"}], "id": "weather-bias"}
88
+ {"label": "adjectives", "pattern": [{"LOWER": "dry"}], "id": "weather-bias"}
89
+ {"label": "adjectives", "pattern": [{"LOWER": "foggy"}], "id": "weather-bias"}
90
+ {"label": "adjectives", "pattern": [{"LOWER": "freezing"}], "id": "weather-bias"}
91
+ {"label": "adjectives", "pattern": [{"LOWER": "frosty"}], "id": "weather-bias"}
92
+ {"label": "adjectives", "pattern": [{"LOWER": "great"}], "id": "weather-bias"}
93
+ {"label": "adjectives", "pattern": [{"LOWER": "hot"}], "id": "weather-bias"}
94
+ {"label": "adjectives", "pattern": [{"LOWER": "humid"}], "id": "weather-bias"}
95
+ {"label": "adjectives", "pattern": [{"LOWER": "icy"}], "id": "weather-bias"}
96
+ {"label": "adjectives", "pattern": [{"LOWER": "light"}], "id": "weather-bias"}
97
+ {"label": "adjectives", "pattern": [{"LOWER": "mild"}], "id": "weather-bias"}
98
+ {"label": "adjectives", "pattern": [{"LOWER": "nice"}], "id": "weather-bias"}
99
+ {"label": "adjectives", "pattern": [{"LOWER": "overcast"}], "id": "weather-bias"}
100
+ {"label": "adjectives", "pattern": [{"LOWER": "rainy"}], "id": "weather-bias"}
101
+ {"label": "adjectives", "pattern": [{"LOWER": "smoggy"}], "id": "weather-bias"}
102
+ {"label": "adjectives", "pattern": [{"LOWER": "snowy"}], "id": "weather-bias"}
103
+ {"label": "adjectives", "pattern": [{"LOWER": "sunny"}], "id": "weather-bias"}
104
+ {"label": "adjectives", "pattern": [{"LOWER": "warm"}], "id": "weather-bias"}
105
+ {"label": "adjectives", "pattern": [{"LOWER": "windy"}], "id": "weather-bias"}
106
+ {"label": "adjectives", "pattern": [{"LOWER": "wintry"}], "id": "weather-bias"}
107
+ {"label": "adjectives", "pattern": [{"LOWER": "bent"}], "id": "shape-bias"}
108
+ {"label": "adjectives", "pattern": [{"LOWER": "blocky"}], "id": "shape-bias"}
109
+ {"label": "adjectives", "pattern": [{"LOWER": "boxy"}], "id": "shape-bias"}
110
+ {"label": "adjectives", "pattern": [{"LOWER": "broad"}], "id": "shape-bias"}
111
+ {"label": "adjectives", "pattern": [{"LOWER": "chunky"}], "id": "shape-bias"}
112
+ {"label": "adjectives", "pattern": [{"LOWER": "compact"}], "id": "shape-bias"}
113
+ {"label": "adjectives", "pattern": [{"LOWER": "fat"}], "id": "shape-bias"}
114
+ {"label": "adjectives", "pattern": [{"LOWER": "flat"}], "id": "shape-bias"}
115
+ {"label": "adjectives", "pattern": [{"LOWER": "full"}], "id": "shape-bias"}
116
+ {"label": "adjectives", "pattern": [{"LOWER": "narrow"}], "id": "shape-bias"}
117
+ {"label": "adjectives", "pattern": [{"LOWER": "pointed"}], "id": "shape-bias"}
118
+ {"label": "adjectives", "pattern": [{"LOWER": "round"}], "id": "shape-bias"}
119
+ {"label": "adjectives", "pattern": [{"LOWER": "rounded"}], "id": "shape-bias"}
120
+ {"label": "adjectives", "pattern": [{"LOWER": "skinny"}], "id": "shape-bias"}
121
+ {"label": "adjectives", "pattern": [{"LOWER": "slim"}], "id": "shape-bias"}
122
+ {"label": "adjectives", "pattern": [{"LOWER": "solid"}], "id": "shape-bias"}
123
+ {"label": "adjectives", "pattern": [{"LOWER": "straight"}], "id": "shape-bias"}
124
+ {"label": "adjectives", "pattern": [{"LOWER": "thick"}], "id": "shape-bias"}
125
+ {"label": "adjectives", "pattern": [{"LOWER": "thin"}], "id": "shape-bias"}
126
+ {"label": "adjectives", "pattern": [{"LOWER": "wide"}], "id": "shape-bias"}
127
+ {"label": "adjectives", "pattern": [{"LOWER": "blaring"}], "id": "sound-bias"}
128
+ {"label": "adjectives", "pattern": [{"LOWER": "booming"}], "id": "sound-bias"}
129
+ {"label": "adjectives", "pattern": [{"LOWER": "deafening"}], "id": "sound-bias"}
130
+ {"label": "adjectives", "pattern": [{"LOWER": "faint"}], "id": "sound-bias"}
131
+ {"label": "adjectives", "pattern": [{"LOWER": "gentle"}], "id": "sound-bias"}
132
+ {"label": "adjectives", "pattern": [{"LOWER": "grating"}], "id": "sound-bias"}
133
+ {"label": "adjectives", "pattern": [{"LOWER": "hushed"}], "id": "sound-bias"}
134
+ {"label": "adjectives", "pattern": [{"LOWER": "loud"}], "id": "sound-bias"}
135
+ {"label": "adjectives", "pattern": [{"LOWER": "muffled"}], "id": "sound-bias"}
136
+ {"label": "adjectives", "pattern": [{"LOWER": "mute"}], "id": "sound-bias"}
137
+ {"label": "adjectives", "pattern": [{"LOWER": "noisy"}], "id": "sound-bias"}
138
+ {"label": "adjectives", "pattern": [{"LOWER": "piercing"}], "id": "sound-bias"}
139
+ {"label": "adjectives", "pattern": [{"LOWER": "quiet"}], "id": "sound-bias"}
140
+ {"label": "adjectives", "pattern": [{"LOWER": "roaring"}], "id": "sound-bias"}
141
+ {"label": "adjectives", "pattern": [{"LOWER": "rowdy"}], "id": "sound-bias"}
142
+ {"label": "adjectives", "pattern": [{"LOWER": "silent"}], "id": "sound-bias"}
143
+ {"label": "adjectives", "pattern": [{"LOWER": "soft"}], "id": "sound-bias"}
144
+ {"label": "adjectives", "pattern": [{"LOWER": "thundering"}], "id": "sound-bias"}
145
+ {"label": "adjectives", "pattern": [{"LOWER": "absolute"}], "id": "physics-bias"}
146
+ {"label": "adjectives", "pattern": [{"LOWER": "achromatic"}], "id": "physics-bias"}
147
+ {"label": "adjectives", "pattern": [{"LOWER": "acoustic"}], "id": "physics-bias"}
148
+ {"label": "adjectives", "pattern": [{"LOWER": "adiabatic"}], "id": "physics-bias"}
149
+ {"label": "adjectives", "pattern": [{"LOWER": "alternating"}], "id": "physics-bias"}
150
+ {"label": "adjectives", "pattern": [{"LOWER": "atomic"}], "id": "physics-bias"}
151
+ {"label": "adjectives", "pattern": [{"LOWER": "binding"}], "id": "physics-bias"}
152
+ {"label": "adjectives", "pattern": [{"LOWER": "brownian"}], "id": "physics-bias"}
153
+ {"label": "adjectives", "pattern": [{"LOWER": "buoyant"}], "id": "physics-bias"}
154
+ {"label": "adjectives", "pattern": [{"LOWER": "chromatic"}], "id": "physics-bias"}
155
+ {"label": "adjectives", "pattern": [{"LOWER": "closed"}], "id": "physics-bias"}
156
+ {"label": "adjectives", "pattern": [{"LOWER": "coherent"}], "id": "physics-bias"}
157
+ {"label": "adjectives", "pattern": [{"LOWER": "critical"}], "id": "physics-bias"}
158
+ {"label": "adjectives", "pattern": [{"LOWER": "dense"}], "id": "physics-bias"}
159
+ {"label": "adjectives", "pattern": [{"LOWER": "direct"}], "id": "physics-bias"}
160
+ {"label": "adjectives", "pattern": [{"LOWER": "electric"}], "id": "physics-bias"}
161
+ {"label": "adjectives", "pattern": [{"LOWER": "electrical"}], "id": "physics-bias"}
162
+ {"label": "adjectives", "pattern": [{"LOWER": "endothermic"}], "id": "physics-bias"}
163
+ {"label": "adjectives", "pattern": [{"LOWER": "exothermic"}], "id": "physics-bias"}
164
+ {"label": "adjectives", "pattern": [{"LOWER": "free"}], "id": "physics-bias"}
165
+ {"label": "adjectives", "pattern": [{"LOWER": "fundamental"}], "id": "physics-bias"}
166
+ {"label": "adjectives", "pattern": [{"LOWER": "gravitational"}], "id": "physics-bias"}
167
+ {"label": "adjectives", "pattern": [{"LOWER": "internal"}], "id": "physics-bias"}
168
+ {"label": "adjectives", "pattern": [{"LOWER": "isobaric"}], "id": "physics-bias"}
169
+ {"label": "adjectives", "pattern": [{"LOWER": "isochoric"}], "id": "physics-bias"}
170
+ {"label": "adjectives", "pattern": [{"LOWER": "isothermal"}], "id": "physics-bias"}
171
+ {"label": "adjectives", "pattern": [{"LOWER": "kinetic"}], "id": "physics-bias"}
172
+ {"label": "adjectives", "pattern": [{"LOWER": "latent"}], "id": "physics-bias"}
173
+ {"label": "adjectives", "pattern": [{"LOWER": "magnetic"}], "id": "physics-bias"}
174
+ {"label": "adjectives", "pattern": [{"LOWER": "mechanical"}], "id": "physics-bias"}
175
+ {"label": "adjectives", "pattern": [{"LOWER": "natural"}], "id": "physics-bias"}
176
+ {"label": "adjectives", "pattern": [{"LOWER": "nuclear"}], "id": "physics-bias"}
177
+ {"label": "adjectives", "pattern": [{"LOWER": "open"}], "id": "physics-bias"}
178
+ {"label": "adjectives", "pattern": [{"LOWER": "optical"}], "id": "physics-bias"}
179
+ {"label": "adjectives", "pattern": [{"LOWER": "potential"}], "id": "physics-bias"}
180
+ {"label": "adjectives", "pattern": [{"LOWER": "primary"}], "id": "physics-bias"}
181
+ {"label": "adjectives", "pattern": [{"LOWER": "progressive"}], "id": "physics-bias"}
182
+ {"label": "adjectives", "pattern": [{"LOWER": "quantum"}], "id": "physics-bias"}
183
+ {"label": "adjectives", "pattern": [{"LOWER": "radiant"}], "id": "physics-bias"}
184
+ {"label": "adjectives", "pattern": [{"LOWER": "radioactive"}], "id": "physics-bias"}
185
+ {"label": "adjectives", "pattern": [{"LOWER": "rectilinear"}], "id": "physics-bias"}
186
+ {"label": "adjectives", "pattern": [{"LOWER": "relative"}], "id": "physics-bias"}
187
+ {"label": "adjectives", "pattern": [{"LOWER": "resolving"}], "id": "physics-bias"}
188
+ {"label": "adjectives", "pattern": [{"LOWER": "resonnt"}], "id": "physics-bias"}
189
+ {"label": "adjectives", "pattern": [{"LOWER": "resultant"}], "id": "physics-bias"}
190
+ {"label": "adjectives", "pattern": [{"LOWER": "rigid"}], "id": "physics-bias"}
191
+ {"label": "adjectives", "pattern": [{"LOWER": "volumetric"}], "id": "physics-bias"}
192
+ {"label": "adjectives", "pattern": [{"LOWER": ""}], "id": "temperature-bias"}
193
+ {"label": "adjectives", "pattern": [{"LOWER": "blistering"}], "id": "temperature-bias"}
194
+ {"label": "adjectives", "pattern": [{"LOWER": "burning"}], "id": "temperature-bias"}
195
+ {"label": "adjectives", "pattern": [{"LOWER": "chill"}], "id": "temperature-bias"}
196
+ {"label": "adjectives", "pattern": [{"LOWER": "cold"}], "id": "temperature-bias"}
197
+ {"label": "adjectives", "pattern": [{"LOWER": "cool"}], "id": "temperature-bias"}
198
+ {"label": "adjectives", "pattern": [{"LOWER": "freezing"}], "id": "temperature-bias"}
199
+ {"label": "adjectives", "pattern": [{"LOWER": "frigid"}], "id": "temperature-bias"}
200
+ {"label": "adjectives", "pattern": [{"LOWER": "frosty"}], "id": "temperature-bias"}
201
+ {"label": "adjectives", "pattern": [{"LOWER": "hot"}], "id": "temperature-bias"}
202
+ {"label": "adjectives", "pattern": [{"LOWER": "icy"}], "id": "temperature-bias"}
203
+ {"label": "adjectives", "pattern": [{"LOWER": "molten"}], "id": "temperature-bias"}
204
+ {"label": "adjectives", "pattern": [{"LOWER": "nippy"}], "id": "temperature-bias"}
205
+ {"label": "adjectives", "pattern": [{"LOWER": "scalding"}], "id": "temperature-bias"}
206
+ {"label": "adjectives", "pattern": [{"LOWER": "searing"}], "id": "temperature-bias"}
207
+ {"label": "adjectives", "pattern": [{"LOWER": "sizzling"}], "id": "temperature-bias"}
208
+ {"label": "adjectives", "pattern": [{"LOWER": "warm"}], "id": "temperature-bias"}
209
+ {"label": "adjectives", "pattern": [{"LOWER": "central"}], "id": "corporate_prefixes-bias"}
210
+ {"label": "adjectives", "pattern": [{"LOWER": "chief"}], "id": "corporate_prefixes-bias"}
211
+ {"label": "adjectives", "pattern": [{"LOWER": "corporate"}], "id": "corporate_prefixes-bias"}
212
+ {"label": "adjectives", "pattern": [{"LOWER": "customer"}], "id": "corporate_prefixes-bias"}
213
+ {"label": "adjectives", "pattern": [{"LOWER": "direct"}], "id": "corporate_prefixes-bias"}
214
+ {"label": "adjectives", "pattern": [{"LOWER": "district"}], "id": "corporate_prefixes-bias"}
215
+ {"label": "adjectives", "pattern": [{"LOWER": "dynamic"}], "id": "corporate_prefixes-bias"}
216
+ {"label": "adjectives", "pattern": [{"LOWER": "forward"}], "id": "corporate_prefixes-bias"}
217
+ {"label": "adjectives", "pattern": [{"LOWER": "future"}], "id": "corporate_prefixes-bias"}
218
+ {"label": "adjectives", "pattern": [{"LOWER": "global"}], "id": "corporate_prefixes-bias"}
219
+ {"label": "adjectives", "pattern": [{"LOWER": "human"}], "id": "corporate_prefixes-bias"}
220
+ {"label": "adjectives", "pattern": [{"LOWER": "internal"}], "id": "corporate_prefixes-bias"}
221
+ {"label": "adjectives", "pattern": [{"LOWER": "international"}], "id": "corporate_prefixes-bias"}
222
+ {"label": "adjectives", "pattern": [{"LOWER": "investor"}], "id": "corporate_prefixes-bias"}
223
+ {"label": "adjectives", "pattern": [{"LOWER": "lead"}], "id": "corporate_prefixes-bias"}
224
+ {"label": "adjectives", "pattern": [{"LOWER": "legacy"}], "id": "corporate_prefixes-bias"}
225
+ {"label": "adjectives", "pattern": [{"LOWER": "national"}], "id": "corporate_prefixes-bias"}
226
+ {"label": "adjectives", "pattern": [{"LOWER": "principal"}], "id": "corporate_prefixes-bias"}
227
+ {"label": "adjectives", "pattern": [{"LOWER": "product"}], "id": "corporate_prefixes-bias"}
228
+ {"label": "adjectives", "pattern": [{"LOWER": "regional"}], "id": "corporate_prefixes-bias"}
229
+ {"label": "adjectives", "pattern": [{"LOWER": "senior"}], "id": "corporate_prefixes-bias"}
230
+ {"label": "adjectives", "pattern": [{"LOWER": "staff"}], "id": "corporate_prefixes-bias"}
231
+ {"label": "adjectives", "pattern": [{"LOWER": "bare"}], "id": "complexity-bias"}
232
+ {"label": "adjectives", "pattern": [{"LOWER": "basic"}], "id": "complexity-bias"}
233
+ {"label": "adjectives", "pattern": [{"LOWER": "clear"}], "id": "complexity-bias"}
234
+ {"label": "adjectives", "pattern": [{"LOWER": "complex"}], "id": "complexity-bias"}
235
+ {"label": "adjectives", "pattern": [{"LOWER": "complicated"}], "id": "complexity-bias"}
236
+ {"label": "adjectives", "pattern": [{"LOWER": "convoluted"}], "id": "complexity-bias"}
237
+ {"label": "adjectives", "pattern": [{"LOWER": "direct"}], "id": "complexity-bias"}
238
+ {"label": "adjectives", "pattern": [{"LOWER": "easy"}], "id": "complexity-bias"}
239
+ {"label": "adjectives", "pattern": [{"LOWER": "elaborate"}], "id": "complexity-bias"}
240
+ {"label": "adjectives", "pattern": [{"LOWER": "fancy"}], "id": "complexity-bias"}
241
+ {"label": "adjectives", "pattern": [{"LOWER": "hard"}], "id": "complexity-bias"}
242
+ {"label": "adjectives", "pattern": [{"LOWER": "intricate"}], "id": "complexity-bias"}
243
+ {"label": "adjectives", "pattern": [{"LOWER": "obvious"}], "id": "complexity-bias"}
244
+ {"label": "adjectives", "pattern": [{"LOWER": "plain"}], "id": "complexity-bias"}
245
+ {"label": "adjectives", "pattern": [{"LOWER": "pure"}], "id": "complexity-bias"}
246
+ {"label": "adjectives", "pattern": [{"LOWER": "simple"}], "id": "complexity-bias"}
247
+ {"label": "adjectives", "pattern": [{"LOWER": "amber"}], "id": "colors-bias"}
248
+ {"label": "adjectives", "pattern": [{"LOWER": "ash"}], "id": "colors-bias"}
249
+ {"label": "adjectives", "pattern": [{"LOWER": "asphalt"}], "id": "colors-bias"}
250
+ {"label": "adjectives", "pattern": [{"LOWER": "auburn"}], "id": "colors-bias"}
251
+ {"label": "adjectives", "pattern": [{"LOWER": "avocado"}], "id": "colors-bias"}
252
+ {"label": "adjectives", "pattern": [{"LOWER": "aquamarine"}], "id": "colors-bias"}
253
+ {"label": "adjectives", "pattern": [{"LOWER": "azure"}], "id": "colors-bias"}
254
+ {"label": "adjectives", "pattern": [{"LOWER": "beige"}], "id": "colors-bias"}
255
+ {"label": "adjectives", "pattern": [{"LOWER": "bisque"}], "id": "colors-bias"}
256
+ {"label": "adjectives", "pattern": [{"LOWER": "black"}], "id": "colors-bias"}
257
+ {"label": "adjectives", "pattern": [{"LOWER": "blue"}], "id": "colors-bias"}
258
+ {"label": "adjectives", "pattern": [{"LOWER": "bone"}], "id": "colors-bias"}
259
+ {"label": "adjectives", "pattern": [{"LOWER": "bordeaux"}], "id": "colors-bias"}
260
+ {"label": "adjectives", "pattern": [{"LOWER": "brass"}], "id": "colors-bias"}
261
+ {"label": "adjectives", "pattern": [{"LOWER": "bronze"}], "id": "colors-bias"}
262
+ {"label": "adjectives", "pattern": [{"LOWER": "brown"}], "id": "colors-bias"}
263
+ {"label": "adjectives", "pattern": [{"LOWER": "burgundy"}], "id": "colors-bias"}
264
+ {"label": "adjectives", "pattern": [{"LOWER": "camel"}], "id": "colors-bias"}
265
+ {"label": "adjectives", "pattern": [{"LOWER": "caramel"}], "id": "colors-bias"}
266
+ {"label": "adjectives", "pattern": [{"LOWER": "canary"}], "id": "colors-bias"}
267
+ {"label": "adjectives", "pattern": [{"LOWER": "celeste"}], "id": "colors-bias"}
268
+ {"label": "adjectives", "pattern": [{"LOWER": "cerulean"}], "id": "colors-bias"}
269
+ {"label": "adjectives", "pattern": [{"LOWER": "champagne"}], "id": "colors-bias"}
270
+ {"label": "adjectives", "pattern": [{"LOWER": "charcoal"}], "id": "colors-bias"}
271
+ {"label": "adjectives", "pattern": [{"LOWER": "chartreuse"}], "id": "colors-bias"}
272
+ {"label": "adjectives", "pattern": [{"LOWER": "chestnut"}], "id": "colors-bias"}
273
+ {"label": "adjectives", "pattern": [{"LOWER": "chocolate"}], "id": "colors-bias"}
274
+ {"label": "adjectives", "pattern": [{"LOWER": "citron"}], "id": "colors-bias"}
275
+ {"label": "adjectives", "pattern": [{"LOWER": "claret"}], "id": "colors-bias"}
276
+ {"label": "adjectives", "pattern": [{"LOWER": "coal"}], "id": "colors-bias"}
277
+ {"label": "adjectives", "pattern": [{"LOWER": "cobalt"}], "id": "colors-bias"}
278
+ {"label": "adjectives", "pattern": [{"LOWER": "coffee"}], "id": "colors-bias"}
279
+ {"label": "adjectives", "pattern": [{"LOWER": "coral"}], "id": "colors-bias"}
280
+ {"label": "adjectives", "pattern": [{"LOWER": "corn"}], "id": "colors-bias"}
281
+ {"label": "adjectives", "pattern": [{"LOWER": "cream"}], "id": "colors-bias"}
282
+ {"label": "adjectives", "pattern": [{"LOWER": "crimson"}], "id": "colors-bias"}
283
+ {"label": "adjectives", "pattern": [{"LOWER": "cyan"}], "id": "colors-bias"}
284
+ {"label": "adjectives", "pattern": [{"LOWER": "denim"}], "id": "colors-bias"}
285
+ {"label": "adjectives", "pattern": [{"LOWER": "desert"}], "id": "colors-bias"}
286
+ {"label": "adjectives", "pattern": [{"LOWER": "ebony"}], "id": "colors-bias"}
287
+ {"label": "adjectives", "pattern": [{"LOWER": "ecru"}], "id": "colors-bias"}
288
+ {"label": "adjectives", "pattern": [{"LOWER": "emerald"}], "id": "colors-bias"}
289
+ {"label": "adjectives", "pattern": [{"LOWER": "feldspar"}], "id": "colors-bias"}
290
+ {"label": "adjectives", "pattern": [{"LOWER": "fuchsia"}], "id": "colors-bias"}
291
+ {"label": "adjectives", "pattern": [{"LOWER": "gold"}], "id": "colors-bias"}
292
+ {"label": "adjectives", "pattern": [{"LOWER": "gray"}], "id": "colors-bias"}
293
+ {"label": "adjectives", "pattern": [{"LOWER": "green"}], "id": "colors-bias"}
294
+ {"label": "adjectives", "pattern": [{"LOWER": "heather"}], "id": "colors-bias"}
295
+ {"label": "adjectives", "pattern": [{"LOWER": "indigo"}], "id": "colors-bias"}
296
+ {"label": "adjectives", "pattern": [{"LOWER": "ivory"}], "id": "colors-bias"}
297
+ {"label": "adjectives", "pattern": [{"LOWER": "jet"}], "id": "colors-bias"}
298
+ {"label": "adjectives", "pattern": [{"LOWER": "khaki"}], "id": "colors-bias"}
299
+ {"label": "adjectives", "pattern": [{"LOWER": "lime"}], "id": "colors-bias"}
300
+ {"label": "adjectives", "pattern": [{"LOWER": "magenta"}], "id": "colors-bias"}
301
+ {"label": "adjectives", "pattern": [{"LOWER": "maroon"}], "id": "colors-bias"}
302
+ {"label": "adjectives", "pattern": [{"LOWER": "mint"}], "id": "colors-bias"}
303
+ {"label": "adjectives", "pattern": [{"LOWER": "navy"}], "id": "colors-bias"}
304
+ {"label": "adjectives", "pattern": [{"LOWER": "olive"}], "id": "colors-bias"}
305
+ {"label": "adjectives", "pattern": [{"LOWER": "orange"}], "id": "colors-bias"}
306
+ {"label": "adjectives", "pattern": [{"LOWER": "pink"}], "id": "colors-bias"}
307
+ {"label": "adjectives", "pattern": [{"LOWER": "plum"}], "id": "colors-bias"}
308
+ {"label": "adjectives", "pattern": [{"LOWER": "purple"}], "id": "colors-bias"}
309
+ {"label": "adjectives", "pattern": [{"LOWER": "red"}], "id": "colors-bias"}
310
+ {"label": "adjectives", "pattern": [{"LOWER": "rust"}], "id": "colors-bias"}
311
+ {"label": "adjectives", "pattern": [{"LOWER": "salmon"}], "id": "colors-bias"}
312
+ {"label": "adjectives", "pattern": [{"LOWER": "sienna"}], "id": "colors-bias"}
313
+ {"label": "adjectives", "pattern": [{"LOWER": "silver"}], "id": "colors-bias"}
314
+ {"label": "adjectives", "pattern": [{"LOWER": "snow"}], "id": "colors-bias"}
315
+ {"label": "adjectives", "pattern": [{"LOWER": "steel"}], "id": "colors-bias"}
316
+ {"label": "adjectives", "pattern": [{"LOWER": "tan"}], "id": "colors-bias"}
317
+ {"label": "adjectives", "pattern": [{"LOWER": "teal"}], "id": "colors-bias"}
318
+ {"label": "adjectives", "pattern": [{"LOWER": "tomato"}], "id": "colors-bias"}
319
+ {"label": "adjectives", "pattern": [{"LOWER": "violet"}], "id": "colors-bias"}
320
+ {"label": "adjectives", "pattern": [{"LOWER": "white"}], "id": "colors-bias"}
321
+ {"label": "adjectives", "pattern": [{"LOWER": "yellow"}], "id": "colors-bias"}
322
+ {"label": "adjectives", "pattern": [{"LOWER": "bitter"}], "id": "taste-bias"}
323
+ {"label": "adjectives", "pattern": [{"LOWER": "chalky"}], "id": "taste-bias"}
324
+ {"label": "adjectives", "pattern": [{"LOWER": "chewy"}], "id": "taste-bias"}
325
+ {"label": "adjectives", "pattern": [{"LOWER": "creamy"}], "id": "taste-bias"}
326
+ {"label": "adjectives", "pattern": [{"LOWER": "crispy"}], "id": "taste-bias"}
327
+ {"label": "adjectives", "pattern": [{"LOWER": "crunchy"}], "id": "taste-bias"}
328
+ {"label": "adjectives", "pattern": [{"LOWER": "dry"}], "id": "taste-bias"}
329
+ {"label": "adjectives", "pattern": [{"LOWER": "greasy"}], "id": "taste-bias"}
330
+ {"label": "adjectives", "pattern": [{"LOWER": "gritty"}], "id": "taste-bias"}
331
+ {"label": "adjectives", "pattern": [{"LOWER": "mild"}], "id": "taste-bias"}
332
+ {"label": "adjectives", "pattern": [{"LOWER": "moist"}], "id": "taste-bias"}
333
+ {"label": "adjectives", "pattern": [{"LOWER": "oily"}], "id": "taste-bias"}
334
+ {"label": "adjectives", "pattern": [{"LOWER": "plain"}], "id": "taste-bias"}
335
+ {"label": "adjectives", "pattern": [{"LOWER": "salty"}], "id": "taste-bias"}
336
+ {"label": "adjectives", "pattern": [{"LOWER": "savory"}], "id": "taste-bias"}
337
+ {"label": "adjectives", "pattern": [{"LOWER": "sour"}], "id": "taste-bias"}
338
+ {"label": "adjectives", "pattern": [{"LOWER": "spicy"}], "id": "taste-bias"}
339
+ {"label": "adjectives", "pattern": [{"LOWER": "sweet"}], "id": "taste-bias"}
340
+ {"label": "adjectives", "pattern": [{"LOWER": "tangy"}], "id": "taste-bias"}
341
+ {"label": "adjectives", "pattern": [{"LOWER": "tart"}], "id": "taste-bias"}
342
+ {"label": "adjectives", "pattern": [{"LOWER": "zesty"}], "id": "taste-bias"}
343
+ {"label": "adjectives", "pattern": [{"LOWER": "all"}], "id": "quantity-bias"}
344
+ {"label": "adjectives", "pattern": [{"LOWER": "another"}], "id": "quantity-bias"}
345
+ {"label": "adjectives", "pattern": [{"LOWER": "each"}], "id": "quantity-bias"}
346
+ {"label": "adjectives", "pattern": [{"LOWER": "either"}], "id": "quantity-bias"}
347
+ {"label": "adjectives", "pattern": [{"LOWER": "every"}], "id": "quantity-bias"}
348
+ {"label": "adjectives", "pattern": [{"LOWER": "few"}], "id": "quantity-bias"}
349
+ {"label": "adjectives", "pattern": [{"LOWER": "many"}], "id": "quantity-bias"}
350
+ {"label": "adjectives", "pattern": [{"LOWER": "numerous"}], "id": "quantity-bias"}
351
+ {"label": "adjectives", "pattern": [{"LOWER": "one"}], "id": "quantity-bias"}
352
+ {"label": "adjectives", "pattern": [{"LOWER": "other"}], "id": "quantity-bias"}
353
+ {"label": "adjectives", "pattern": [{"LOWER": "several"}], "id": "quantity-bias"}
354
+ {"label": "adjectives", "pattern": [{"LOWER": "some"}], "id": "quantity-bias"}
355
+ {"label": "adjectives", "pattern": [{"LOWER": "average"}], "id": "size-bias"}
356
+ {"label": "adjectives", "pattern": [{"LOWER": "big"}], "id": "size-bias"}
357
+ {"label": "adjectives", "pattern": [{"LOWER": "broad"}], "id": "size-bias"}
358
+ {"label": "adjectives", "pattern": [{"LOWER": "flat"}], "id": "size-bias"}
359
+ {"label": "adjectives", "pattern": [{"LOWER": "giant"}], "id": "size-bias"}
360
+ {"label": "adjectives", "pattern": [{"LOWER": "huge"}], "id": "size-bias"}
361
+ {"label": "adjectives", "pattern": [{"LOWER": "humongous"}], "id": "size-bias"}
362
+ {"label": "adjectives", "pattern": [{"LOWER": "immense"}], "id": "size-bias"}
363
+ {"label": "adjectives", "pattern": [{"LOWER": "large"}], "id": "size-bias"}
364
+ {"label": "adjectives", "pattern": [{"LOWER": "little"}], "id": "size-bias"}
365
+ {"label": "adjectives", "pattern": [{"LOWER": "long"}], "id": "size-bias"}
366
+ {"label": "adjectives", "pattern": [{"LOWER": "massive"}], "id": "size-bias"}
367
+ {"label": "adjectives", "pattern": [{"LOWER": "medium"}], "id": "size-bias"}
368
+ {"label": "adjectives", "pattern": [{"LOWER": "miniature"}], "id": "size-bias"}
369
+ {"label": "adjectives", "pattern": [{"LOWER": "short"}], "id": "size-bias"}
370
+ {"label": "adjectives", "pattern": [{"LOWER": "small"}], "id": "size-bias"}
371
+ {"label": "adjectives", "pattern": [{"LOWER": "tall"}], "id": "size-bias"}
372
+ {"label": "adjectives", "pattern": [{"LOWER": "tiny"}], "id": "size-bias"}
373
+ {"label": "adjectives", "pattern": [{"LOWER": "wide"}], "id": "size-bias"}
374
+ {"label": "adjectives", "pattern": [{"LOWER": "absolute"}], "id": "algorithms-bias"}
375
+ {"label": "adjectives", "pattern": [{"LOWER": "abstract"}], "id": "algorithms-bias"}
376
+ {"label": "adjectives", "pattern": [{"LOWER": "active"}], "id": "algorithms-bias"}
377
+ {"label": "adjectives", "pattern": [{"LOWER": "acyclic"}], "id": "algorithms-bias"}
378
+ {"label": "adjectives", "pattern": [{"LOWER": "adaptive"}], "id": "algorithms-bias"}
379
+ {"label": "adjectives", "pattern": [{"LOWER": "amortized"}], "id": "algorithms-bias"}
380
+ {"label": "adjectives", "pattern": [{"LOWER": "approximate"}], "id": "algorithms-bias"}
381
+ {"label": "adjectives", "pattern": [{"LOWER": "ascent"}], "id": "algorithms-bias"}
382
+ {"label": "adjectives", "pattern": [{"LOWER": "associative"}], "id": "algorithms-bias"}
383
+ {"label": "adjectives", "pattern": [{"LOWER": "asymptotic"}], "id": "algorithms-bias"}
384
+ {"label": "adjectives", "pattern": [{"LOWER": "augmenting"}], "id": "algorithms-bias"}
385
+ {"label": "adjectives", "pattern": [{"LOWER": "average"}], "id": "algorithms-bias"}
386
+ {"label": "adjectives", "pattern": [{"LOWER": "balanced"}], "id": "algorithms-bias"}
387
+ {"label": "adjectives", "pattern": [{"LOWER": "best"}], "id": "algorithms-bias"}
388
+ {"label": "adjectives", "pattern": [{"LOWER": "binary"}], "id": "algorithms-bias"}
389
+ {"label": "adjectives", "pattern": [{"LOWER": "bipartite"}], "id": "algorithms-bias"}
390
+ {"label": "adjectives", "pattern": [{"LOWER": "blocking"}], "id": "algorithms-bias"}
391
+ {"label": "adjectives", "pattern": [{"LOWER": "boolean"}], "id": "algorithms-bias"}
392
+ {"label": "adjectives", "pattern": [{"LOWER": "bounded"}], "id": "algorithms-bias"}
393
+ {"label": "adjectives", "pattern": [{"LOWER": "brute force"}], "id": "algorithms-bias"}
394
+ {"label": "adjectives", "pattern": [{"LOWER": "commutative"}], "id": "algorithms-bias"}
395
+ {"label": "adjectives", "pattern": [{"LOWER": "complete"}], "id": "algorithms-bias"}
396
+ {"label": "adjectives", "pattern": [{"LOWER": "concave"}], "id": "algorithms-bias"}
397
+ {"label": "adjectives", "pattern": [{"LOWER": "concurrent"}], "id": "algorithms-bias"}
398
+ {"label": "adjectives", "pattern": [{"LOWER": "connected"}], "id": "algorithms-bias"}
399
+ {"label": "adjectives", "pattern": [{"LOWER": "constant"}], "id": "algorithms-bias"}
400
+ {"label": "adjectives", "pattern": [{"LOWER": "counting"}], "id": "algorithms-bias"}
401
+ {"label": "adjectives", "pattern": [{"LOWER": "covering"}], "id": "algorithms-bias"}
402
+ {"label": "adjectives", "pattern": [{"LOWER": "cyclic"}], "id": "algorithms-bias"}
403
+ {"label": "adjectives", "pattern": [{"LOWER": "decidable"}], "id": "algorithms-bias"}
404
+ {"label": "adjectives", "pattern": [{"LOWER": "descent"}], "id": "algorithms-bias"}
405
+ {"label": "adjectives", "pattern": [{"LOWER": "deterministic"}], "id": "algorithms-bias"}
406
+ {"label": "adjectives", "pattern": [{"LOWER": "dichotomic"}], "id": "algorithms-bias"}
407
+ {"label": "adjectives", "pattern": [{"LOWER": "dyadic"}], "id": "algorithms-bias"}
408
+ {"label": "adjectives", "pattern": [{"LOWER": "dynamic"}], "id": "algorithms-bias"}
409
+ {"label": "adjectives", "pattern": [{"LOWER": "exact"}], "id": "algorithms-bias"}
410
+ {"label": "adjectives", "pattern": [{"LOWER": "exhaustive"}], "id": "algorithms-bias"}
411
+ {"label": "adjectives", "pattern": [{"LOWER": "exponential"}], "id": "algorithms-bias"}
412
+ {"label": "adjectives", "pattern": [{"LOWER": "extended"}], "id": "algorithms-bias"}
413
+ {"label": "adjectives", "pattern": [{"LOWER": "external"}], "id": "algorithms-bias"}
414
+ {"label": "adjectives", "pattern": [{"LOWER": "extremal"}], "id": "algorithms-bias"}
415
+ {"label": "adjectives", "pattern": [{"LOWER": "factorial"}], "id": "algorithms-bias"}
416
+ {"label": "adjectives", "pattern": [{"LOWER": "feasible"}], "id": "algorithms-bias"}
417
+ {"label": "adjectives", "pattern": [{"LOWER": "finite"}], "id": "algorithms-bias"}
418
+ {"label": "adjectives", "pattern": [{"LOWER": "fixed"}], "id": "algorithms-bias"}
419
+ {"label": "adjectives", "pattern": [{"LOWER": "formal"}], "id": "algorithms-bias"}
420
+ {"label": "adjectives", "pattern": [{"LOWER": "forward"}], "id": "algorithms-bias"}
421
+ {"label": "adjectives", "pattern": [{"LOWER": "free"}], "id": "algorithms-bias"}
422
+ {"label": "adjectives", "pattern": [{"LOWER": "greedy"}], "id": "algorithms-bias"}
423
+ {"label": "adjectives", "pattern": [{"LOWER": "hidden"}], "id": "algorithms-bias"}
424
+ {"label": "adjectives", "pattern": [{"LOWER": "inclusive"}], "id": "algorithms-bias"}
425
+ {"label": "adjectives", "pattern": [{"LOWER": "internal"}], "id": "algorithms-bias"}
426
+ {"label": "adjectives", "pattern": [{"LOWER": "intractable"}], "id": "algorithms-bias"}
427
+ {"label": "adjectives", "pattern": [{"LOWER": "inverse"}], "id": "algorithms-bias"}
428
+ {"label": "adjectives", "pattern": [{"LOWER": "inverted"}], "id": "algorithms-bias"}
429
+ {"label": "adjectives", "pattern": [{"LOWER": "isomorphic"}], "id": "algorithms-bias"}
430
+ {"label": "adjectives", "pattern": [{"LOWER": "linear"}], "id": "algorithms-bias"}
431
+ {"label": "adjectives", "pattern": [{"LOWER": "local"}], "id": "algorithms-bias"}
432
+ {"label": "adjectives", "pattern": [{"LOWER": "lower"}], "id": "algorithms-bias"}
433
+ {"label": "adjectives", "pattern": [{"LOWER": "matching"}], "id": "algorithms-bias"}
434
+ {"label": "adjectives", "pattern": [{"LOWER": "maximum"}], "id": "algorithms-bias"}
435
+ {"label": "adjectives", "pattern": [{"LOWER": "mean"}], "id": "algorithms-bias"}
436
+ {"label": "adjectives", "pattern": [{"LOWER": "median"}], "id": "algorithms-bias"}
437
+ {"label": "adjectives", "pattern": [{"LOWER": "minimum"}], "id": "algorithms-bias"}
438
+ {"label": "adjectives", "pattern": [{"LOWER": "mode"}], "id": "algorithms-bias"}
439
+ {"label": "adjectives", "pattern": [{"LOWER": "naive"}], "id": "algorithms-bias"}
440
+ {"label": "adjectives", "pattern": [{"LOWER": "nearest"}], "id": "algorithms-bias"}
441
+ {"label": "adjectives", "pattern": [{"LOWER": "nondeterministic"}], "id": "algorithms-bias"}
442
+ {"label": "adjectives", "pattern": [{"LOWER": "null"}], "id": "algorithms-bias"}
443
+ {"label": "adjectives", "pattern": [{"LOWER": "nullary"}], "id": "algorithms-bias"}
444
+ {"label": "adjectives", "pattern": [{"LOWER": "objective"}], "id": "algorithms-bias"}
445
+ {"label": "adjectives", "pattern": [{"LOWER": "offline"}], "id": "algorithms-bias"}
446
+ {"label": "adjectives", "pattern": [{"LOWER": "online"}], "id": "algorithms-bias"}
447
+ {"label": "adjectives", "pattern": [{"LOWER": "optimal"}], "id": "algorithms-bias"}
448
+ {"label": "adjectives", "pattern": [{"LOWER": "ordered"}], "id": "algorithms-bias"}
449
+ {"label": "adjectives", "pattern": [{"LOWER": "oriented"}], "id": "algorithms-bias"}
450
+ {"label": "adjectives", "pattern": [{"LOWER": "orthogonal"}], "id": "algorithms-bias"}
451
+ {"label": "adjectives", "pattern": [{"LOWER": "oscillating"}], "id": "algorithms-bias"}
452
+ {"label": "adjectives", "pattern": [{"LOWER": "parallel"}], "id": "algorithms-bias"}
453
+ {"label": "adjectives", "pattern": [{"LOWER": "partial"}], "id": "algorithms-bias"}
454
+ {"label": "adjectives", "pattern": [{"LOWER": "perfect"}], "id": "algorithms-bias"}
455
+ {"label": "adjectives", "pattern": [{"LOWER": "persistent"}], "id": "algorithms-bias"}
456
+ {"label": "adjectives", "pattern": [{"LOWER": "planar"}], "id": "algorithms-bias"}
457
+ {"label": "adjectives", "pattern": [{"LOWER": "polynomial"}], "id": "algorithms-bias"}
458
+ {"label": "adjectives", "pattern": [{"LOWER": "proper"}], "id": "algorithms-bias"}
459
+ {"label": "adjectives", "pattern": [{"LOWER": "quadratic"}], "id": "algorithms-bias"}
460
+ {"label": "adjectives", "pattern": [{"LOWER": "ragged"}], "id": "algorithms-bias"}
461
+ {"label": "adjectives", "pattern": [{"LOWER": "random"}], "id": "algorithms-bias"}
462
+ {"label": "adjectives", "pattern": [{"LOWER": "randomized"}], "id": "algorithms-bias"}
463
+ {"label": "adjectives", "pattern": [{"LOWER": "rectilinear"}], "id": "algorithms-bias"}
464
+ {"label": "adjectives", "pattern": [{"LOWER": "recursive"}], "id": "algorithms-bias"}
465
+ {"label": "adjectives", "pattern": [{"LOWER": "reduced"}], "id": "algorithms-bias"}
466
+ {"label": "adjectives", "pattern": [{"LOWER": "relaxed"}], "id": "algorithms-bias"}
467
+ {"label": "adjectives", "pattern": [{"LOWER": "shortest"}], "id": "algorithms-bias"}
468
+ {"label": "adjectives", "pattern": [{"LOWER": "simple"}], "id": "algorithms-bias"}
469
+ {"label": "adjectives", "pattern": [{"LOWER": "sparse"}], "id": "algorithms-bias"}
470
+ {"label": "adjectives", "pattern": [{"LOWER": "spatial"}], "id": "algorithms-bias"}
471
+ {"label": "adjectives", "pattern": [{"LOWER": "square"}], "id": "algorithms-bias"}
472
+ {"label": "adjectives", "pattern": [{"LOWER": "stable"}], "id": "algorithms-bias"}
473
+ {"label": "adjectives", "pattern": [{"LOWER": "swarm"}], "id": "algorithms-bias"}
474
+ {"label": "adjectives", "pattern": [{"LOWER": "symmetric"}], "id": "algorithms-bias"}
475
+ {"label": "adjectives", "pattern": [{"LOWER": "terminal"}], "id": "algorithms-bias"}
476
+ {"label": "adjectives", "pattern": [{"LOWER": "ternary"}], "id": "algorithms-bias"}
477
+ {"label": "adjectives", "pattern": [{"LOWER": "threaded"}], "id": "algorithms-bias"}
478
+ {"label": "adjectives", "pattern": [{"LOWER": "tractable"}], "id": "algorithms-bias"}
479
+ {"label": "adjectives", "pattern": [{"LOWER": "unary"}], "id": "algorithms-bias"}
480
+ {"label": "adjectives", "pattern": [{"LOWER": "undecidable"}], "id": "algorithms-bias"}
481
+ {"label": "adjectives", "pattern": [{"LOWER": "undirected"}], "id": "algorithms-bias"}
482
+ {"label": "adjectives", "pattern": [{"LOWER": "uniform"}], "id": "algorithms-bias"}
483
+ {"label": "adjectives", "pattern": [{"LOWER": "universal"}], "id": "algorithms-bias"}
484
+ {"label": "adjectives", "pattern": [{"LOWER": "unsolvable"}], "id": "algorithms-bias"}
485
+ {"label": "adjectives", "pattern": [{"LOWER": "unsorted"}], "id": "algorithms-bias"}
486
+ {"label": "adjectives", "pattern": [{"LOWER": "visible"}], "id": "algorithms-bias"}
487
+ {"label": "adjectives", "pattern": [{"LOWER": "weighted"}], "id": "algorithms-bias"}
488
+ {"label": "adjectives", "pattern": [{"LOWER": "acute"}], "id": "geometry-bias"}
489
+ {"label": "adjectives", "pattern": [{"LOWER": "adjacent"}], "id": "geometry-bias"}
490
+ {"label": "adjectives", "pattern": [{"LOWER": "alternate"}], "id": "geometry-bias"}
491
+ {"label": "adjectives", "pattern": [{"LOWER": "central"}], "id": "geometry-bias"}
492
+ {"label": "adjectives", "pattern": [{"LOWER": "coincident"}], "id": "geometry-bias"}
493
+ {"label": "adjectives", "pattern": [{"LOWER": "collinear"}], "id": "geometry-bias"}
494
+ {"label": "adjectives", "pattern": [{"LOWER": "composite"}], "id": "geometry-bias"}
495
+ {"label": "adjectives", "pattern": [{"LOWER": "concave"}], "id": "geometry-bias"}
496
+ {"label": "adjectives", "pattern": [{"LOWER": "concentric"}], "id": "geometry-bias"}
497
+ {"label": "adjectives", "pattern": [{"LOWER": "congruent"}], "id": "geometry-bias"}
498
+ {"label": "adjectives", "pattern": [{"LOWER": "convex"}], "id": "geometry-bias"}
499
+ {"label": "adjectives", "pattern": [{"LOWER": "coplanar"}], "id": "geometry-bias"}
500
+ {"label": "adjectives", "pattern": [{"LOWER": "diagonal"}], "id": "geometry-bias"}
501
+ {"label": "adjectives", "pattern": [{"LOWER": "distinct"}], "id": "geometry-bias"}
502
+ {"label": "adjectives", "pattern": [{"LOWER": "equidistant"}], "id": "geometry-bias"}
503
+ {"label": "adjectives", "pattern": [{"LOWER": "equilateral"}], "id": "geometry-bias"}
504
+ {"label": "adjectives", "pattern": [{"LOWER": "fixed"}], "id": "geometry-bias"}
505
+ {"label": "adjectives", "pattern": [{"LOWER": "horizontal"}], "id": "geometry-bias"}
506
+ {"label": "adjectives", "pattern": [{"LOWER": "inscribed"}], "id": "geometry-bias"}
507
+ {"label": "adjectives", "pattern": [{"LOWER": "interior"}], "id": "geometry-bias"}
508
+ {"label": "adjectives", "pattern": [{"LOWER": "irregular"}], "id": "geometry-bias"}
509
+ {"label": "adjectives", "pattern": [{"LOWER": "linear"}], "id": "geometry-bias"}
510
+ {"label": "adjectives", "pattern": [{"LOWER": "oblique"}], "id": "geometry-bias"}
511
+ {"label": "adjectives", "pattern": [{"LOWER": "obtuse"}], "id": "geometry-bias"}
512
+ {"label": "adjectives", "pattern": [{"LOWER": "parallel"}], "id": "geometry-bias"}
513
+ {"label": "adjectives", "pattern": [{"LOWER": "perpendicular"}], "id": "geometry-bias"}
514
+ {"label": "adjectives", "pattern": [{"LOWER": "regular"}], "id": "geometry-bias"}
515
+ {"label": "adjectives", "pattern": [{"LOWER": "right"}], "id": "geometry-bias"}
516
+ {"label": "adjectives", "pattern": [{"LOWER": "similar"}], "id": "geometry-bias"}
517
+ {"label": "adjectives", "pattern": [{"LOWER": "vertical"}], "id": "geometry-bias"}
518
+ {"label": "adjectives", "pattern": [{"LOWER": "brass"}], "id": "materials-bias"}
519
+ {"label": "adjectives", "pattern": [{"LOWER": "chalky"}], "id": "materials-bias"}
520
+ {"label": "adjectives", "pattern": [{"LOWER": "concrete"}], "id": "materials-bias"}
521
+ {"label": "adjectives", "pattern": [{"LOWER": "felt"}], "id": "materials-bias"}
522
+ {"label": "adjectives", "pattern": [{"LOWER": "gilded"}], "id": "materials-bias"}
523
+ {"label": "adjectives", "pattern": [{"LOWER": "glass"}], "id": "materials-bias"}
524
+ {"label": "adjectives", "pattern": [{"LOWER": "golden"}], "id": "materials-bias"}
525
+ {"label": "adjectives", "pattern": [{"LOWER": "iron"}], "id": "materials-bias"}
526
+ {"label": "adjectives", "pattern": [{"LOWER": "leather"}], "id": "materials-bias"}
527
+ {"label": "adjectives", "pattern": [{"LOWER": "metal"}], "id": "materials-bias"}
528
+ {"label": "adjectives", "pattern": [{"LOWER": "metallic"}], "id": "materials-bias"}
529
+ {"label": "adjectives", "pattern": [{"LOWER": "oily"}], "id": "materials-bias"}
530
+ {"label": "adjectives", "pattern": [{"LOWER": "paper"}], "id": "materials-bias"}
531
+ {"label": "adjectives", "pattern": [{"LOWER": "plastic"}], "id": "materials-bias"}
532
+ {"label": "adjectives", "pattern": [{"LOWER": "silver"}], "id": "materials-bias"}
533
+ {"label": "adjectives", "pattern": [{"LOWER": "steel"}], "id": "materials-bias"}
534
+ {"label": "adjectives", "pattern": [{"LOWER": "stone"}], "id": "materials-bias"}
535
+ {"label": "adjectives", "pattern": [{"LOWER": "watery"}], "id": "materials-bias"}
536
+ {"label": "adjectives", "pattern": [{"LOWER": "wicker"}], "id": "materials-bias"}
537
+ {"label": "adjectives", "pattern": [{"LOWER": "wood"}], "id": "materials-bias"}
538
+ {"label": "adjectives", "pattern": [{"LOWER": "wooden"}], "id": "materials-bias"}
539
+ {"label": "adjectives", "pattern": [{"LOWER": "woolen"}], "id": "materials-bias"}
540
+ {"label": "adjectives", "pattern": [{"LOWER": "beveled"}], "id": "construction-bias"}
541
+ {"label": "adjectives", "pattern": [{"LOWER": "chamfered"}], "id": "construction-bias"}
542
+ {"label": "adjectives", "pattern": [{"LOWER": "coped"}], "id": "construction-bias"}
543
+ {"label": "adjectives", "pattern": [{"LOWER": "flashed"}], "id": "construction-bias"}
544
+ {"label": "adjectives", "pattern": [{"LOWER": "flush"}], "id": "construction-bias"}
545
+ {"label": "adjectives", "pattern": [{"LOWER": "inflammable"}], "id": "construction-bias"}
546
+ {"label": "adjectives", "pattern": [{"LOWER": "insulated"}], "id": "construction-bias"}
547
+ {"label": "adjectives", "pattern": [{"LOWER": "isometric"}], "id": "construction-bias"}
548
+ {"label": "adjectives", "pattern": [{"LOWER": "joint"}], "id": "construction-bias"}
549
+ {"label": "adjectives", "pattern": [{"LOWER": "knurled"}], "id": "construction-bias"}
550
+ {"label": "adjectives", "pattern": [{"LOWER": "laminated"}], "id": "construction-bias"}
551
+ {"label": "adjectives", "pattern": [{"LOWER": "level"}], "id": "construction-bias"}
552
+ {"label": "adjectives", "pattern": [{"LOWER": "plumb"}], "id": "construction-bias"}
553
+ {"label": "adjectives", "pattern": [{"LOWER": "radial"}], "id": "construction-bias"}
554
+ {"label": "adjectives", "pattern": [{"LOWER": "rigid"}], "id": "construction-bias"}
555
+ {"label": "adjectives", "pattern": [{"LOWER": "soluble"}], "id": "construction-bias"}
556
+ {"label": "adjectives", "pattern": [{"LOWER": "tempered"}], "id": "construction-bias"}
557
+ {"label": "adjectives", "pattern": [{"LOWER": "warped"}], "id": "construction-bias"}
558
+ {"label": "adjectives", "pattern": [{"LOWER": "adagio"}], "id": "music_theory-bias"}
559
+ {"label": "adjectives", "pattern": [{"LOWER": "allegro"}], "id": "music_theory-bias"}
560
+ {"label": "adjectives", "pattern": [{"LOWER": "andante"}], "id": "music_theory-bias"}
561
+ {"label": "adjectives", "pattern": [{"LOWER": "animato"}], "id": "music_theory-bias"}
562
+ {"label": "adjectives", "pattern": [{"LOWER": "espressivo"}], "id": "music_theory-bias"}
563
+ {"label": "adjectives", "pattern": [{"LOWER": "grandioso"}], "id": "music_theory-bias"}
564
+ {"label": "adjectives", "pattern": [{"LOWER": "grave"}], "id": "music_theory-bias"}
565
+ {"label": "adjectives", "pattern": [{"LOWER": "largo"}], "id": "music_theory-bias"}
566
+ {"label": "adjectives", "pattern": [{"LOWER": "legato"}], "id": "music_theory-bias"}
567
+ {"label": "adjectives", "pattern": [{"LOWER": "libretto"}], "id": "music_theory-bias"}
568
+ {"label": "adjectives", "pattern": [{"LOWER": "moderato"}], "id": "music_theory-bias"}
569
+ {"label": "adjectives", "pattern": [{"LOWER": "molto"}], "id": "music_theory-bias"}
570
+ {"label": "adjectives", "pattern": [{"LOWER": "pizzicato"}], "id": "music_theory-bias"}
571
+ {"label": "adjectives", "pattern": [{"LOWER": "presto"}], "id": "music_theory-bias"}
572
+ {"label": "adjectives", "pattern": [{"LOWER": "staccato"}], "id": "music_theory-bias"}
573
+ {"label": "adjectives", "pattern": [{"LOWER": "vibrato"}], "id": "music_theory-bias"}
574
+ {"label": "adjectives", "pattern": [{"LOWER": "blazing"}], "id": "appearance-bias"}
575
+ {"label": "adjectives", "pattern": [{"LOWER": "bright"}], "id": "appearance-bias"}
576
+ {"label": "adjectives", "pattern": [{"LOWER": "brilliant"}], "id": "appearance-bias"}
577
+ {"label": "adjectives", "pattern": [{"LOWER": "burning"}], "id": "appearance-bias"}
578
+ {"label": "adjectives", "pattern": [{"LOWER": "clean"}], "id": "appearance-bias"}
579
+ {"label": "adjectives", "pattern": [{"LOWER": "colorful"}], "id": "appearance-bias"}
580
+ {"label": "adjectives", "pattern": [{"LOWER": "dark"}], "id": "appearance-bias"}
581
+ {"label": "adjectives", "pattern": [{"LOWER": "drab"}], "id": "appearance-bias"}
582
+ {"label": "adjectives", "pattern": [{"LOWER": "dull"}], "id": "appearance-bias"}
583
+ {"label": "adjectives", "pattern": [{"LOWER": "faded"}], "id": "appearance-bias"}
584
+ {"label": "adjectives", "pattern": [{"LOWER": "flat"}], "id": "appearance-bias"}
585
+ {"label": "adjectives", "pattern": [{"LOWER": "glossy"}], "id": "appearance-bias"}
586
+ {"label": "adjectives", "pattern": [{"LOWER": "glowing"}], "id": "appearance-bias"}
587
+ {"label": "adjectives", "pattern": [{"LOWER": "light"}], "id": "appearance-bias"}
588
+ {"label": "adjectives", "pattern": [{"LOWER": "matte"}], "id": "appearance-bias"}
589
+ {"label": "adjectives", "pattern": [{"LOWER": "muted"}], "id": "appearance-bias"}
590
+ {"label": "adjectives", "pattern": [{"LOWER": "pale"}], "id": "appearance-bias"}
591
+ {"label": "adjectives", "pattern": [{"LOWER": "pallid"}], "id": "appearance-bias"}
592
+ {"label": "adjectives", "pattern": [{"LOWER": "radiant"}], "id": "appearance-bias"}
593
+ {"label": "adjectives", "pattern": [{"LOWER": "shiny"}], "id": "appearance-bias"}
594
+ {"label": "adjectives", "pattern": [{"LOWER": "sleek"}], "id": "appearance-bias"}
595
+ {"label": "adjectives", "pattern": [{"LOWER": "sunny"}], "id": "appearance-bias"}
596
+ {"label": "adjectives", "pattern": [{"LOWER": "vibrant"}], "id": "appearance-bias"}
597
+ {"label": "adjectives", "pattern": [{"LOWER": "vivid"}], "id": "appearance-bias"}
598
+ {"label": "adjectives", "pattern": [{"LOWER": "wan"}], "id": "appearance-bias"}
599
+ {"label": "adjectives", "pattern": [{"LOWER": "weathered"}], "id": "appearance-bias"}
600
+ {"label": "adjectives", "pattern": [{"LOWER": "worn"}], "id": "appearance-bias"}
601
+ {"label": "adjectives", "pattern": [{"LOWER": "descriptive"}], "id": "linguistics-bias"}
602
+ {"label": "adjectives", "pattern": [{"LOWER": "diachronic"}], "id": "linguistics-bias"}
603
+ {"label": "adjectives", "pattern": [{"LOWER": "figurative"}], "id": "linguistics-bias"}
604
+ {"label": "adjectives", "pattern": [{"LOWER": "generative"}], "id": "linguistics-bias"}
605
+ {"label": "adjectives", "pattern": [{"LOWER": "marked"}], "id": "linguistics-bias"}
606
+ {"label": "adjectives", "pattern": [{"LOWER": "regular"}], "id": "linguistics-bias"}
607
+ {"label": "adjectives", "pattern": [{"LOWER": "synchronic"}], "id": "linguistics-bias"}
608
+ {"label": "adjectives", "pattern": [{"LOWER": "taxonomic"}], "id": "linguistics-bias"}
609
+ {"label": "adjectives", "pattern": [{"LOWER": "unproductive"}], "id": "linguistics-bias"}
610
+ {"label": "adjectives", "pattern": [{"LOWER": "afraid"}], "id": "emotions-bias"}
611
+ {"label": "adjectives", "pattern": [{"LOWER": "angry"}], "id": "emotions-bias"}
612
+ {"label": "adjectives", "pattern": [{"LOWER": "calm"}], "id": "emotions-bias"}
613
+ {"label": "adjectives", "pattern": [{"LOWER": "cheerful"}], "id": "emotions-bias"}
614
+ {"label": "adjectives", "pattern": [{"LOWER": "cold"}], "id": "emotions-bias"}
615
+ {"label": "adjectives", "pattern": [{"LOWER": "crabby"}], "id": "emotions-bias"}
616
+ {"label": "adjectives", "pattern": [{"LOWER": "crazy"}], "id": "emotions-bias"}
617
+ {"label": "adjectives", "pattern": [{"LOWER": "cross"}], "id": "emotions-bias"}
618
+ {"label": "adjectives", "pattern": [{"LOWER": "excited"}], "id": "emotions-bias"}
619
+ {"label": "adjectives", "pattern": [{"LOWER": "frigid"}], "id": "emotions-bias"}
620
+ {"label": "adjectives", "pattern": [{"LOWER": "furious"}], "id": "emotions-bias"}
621
+ {"label": "adjectives", "pattern": [{"LOWER": "glad"}], "id": "emotions-bias"}
622
+ {"label": "adjectives", "pattern": [{"LOWER": "glum"}], "id": "emotions-bias"}
623
+ {"label": "adjectives", "pattern": [{"LOWER": "happy"}], "id": "emotions-bias"}
624
+ {"label": "adjectives", "pattern": [{"LOWER": "icy"}], "id": "emotions-bias"}
625
+ {"label": "adjectives", "pattern": [{"LOWER": "jolly"}], "id": "emotions-bias"}
626
+ {"label": "adjectives", "pattern": [{"LOWER": "jovial"}], "id": "emotions-bias"}
627
+ {"label": "adjectives", "pattern": [{"LOWER": "kind"}], "id": "emotions-bias"}
628
+ {"label": "adjectives", "pattern": [{"LOWER": "lively"}], "id": "emotions-bias"}
629
+ {"label": "adjectives", "pattern": [{"LOWER": "livid"}], "id": "emotions-bias"}
630
+ {"label": "adjectives", "pattern": [{"LOWER": "mad"}], "id": "emotions-bias"}
631
+ {"label": "adjectives", "pattern": [{"LOWER": "ornery"}], "id": "emotions-bias"}
632
+ {"label": "adjectives", "pattern": [{"LOWER": "rosy"}], "id": "emotions-bias"}
633
+ {"label": "adjectives", "pattern": [{"LOWER": "sad"}], "id": "emotions-bias"}
634
+ {"label": "adjectives", "pattern": [{"LOWER": "scared"}], "id": "emotions-bias"}
635
+ {"label": "adjectives", "pattern": [{"LOWER": "seething"}], "id": "emotions-bias"}
636
+ {"label": "adjectives", "pattern": [{"LOWER": "shy"}], "id": "emotions-bias"}
637
+ {"label": "adjectives", "pattern": [{"LOWER": "sunny"}], "id": "emotions-bias"}
638
+ {"label": "adjectives", "pattern": [{"LOWER": "tense"}], "id": "emotions-bias"}
639
+ {"label": "adjectives", "pattern": [{"LOWER": "tranquil"}], "id": "emotions-bias"}
640
+ {"label": "adjectives", "pattern": [{"LOWER": "upbeat"}], "id": "emotions-bias"}
641
+ {"label": "adjectives", "pattern": [{"LOWER": "wary"}], "id": "emotions-bias"}
642
+ {"label": "adjectives", "pattern": [{"LOWER": "weary"}], "id": "emotions-bias"}
643
+ {"label": "adjectives", "pattern": [{"LOWER": "worried"}], "id": "emotions-bias"}
644
+ {"label": "adjectives", "pattern": [{"LOWER": "advanced"}], "id": "age-bias"}
645
+ {"label": "adjectives", "pattern": [{"LOWER": "aged"}], "id": "age-bias"}
646
+ {"label": "adjectives", "pattern": [{"LOWER": "ancient"}], "id": "age-bias"}
647
+ {"label": "adjectives", "pattern": [{"LOWER": "antique"}], "id": "age-bias"}
648
+ {"label": "adjectives", "pattern": [{"LOWER": "archaic"}], "id": "age-bias"}
649
+ {"label": "adjectives", "pattern": [{"LOWER": "contemporary"}], "id": "age-bias"}
650
+ {"label": "adjectives", "pattern": [{"LOWER": "current"}], "id": "age-bias"}
651
+ {"label": "adjectives", "pattern": [{"LOWER": "frayed"}], "id": "age-bias"}
652
+ {"label": "adjectives", "pattern": [{"LOWER": "fresh"}], "id": "age-bias"}
653
+ {"label": "adjectives", "pattern": [{"LOWER": "grizzled"}], "id": "age-bias"}
654
+ {"label": "adjectives", "pattern": [{"LOWER": "hoary"}], "id": "age-bias"}
655
+ {"label": "adjectives", "pattern": [{"LOWER": "immature"}], "id": "age-bias"}
656
+ {"label": "adjectives", "pattern": [{"LOWER": "juvenile"}], "id": "age-bias"}
657
+ {"label": "adjectives", "pattern": [{"LOWER": "mature"}], "id": "age-bias"}
658
+ {"label": "adjectives", "pattern": [{"LOWER": "modern"}], "id": "age-bias"}
659
+ {"label": "adjectives", "pattern": [{"LOWER": "new"}], "id": "age-bias"}
660
+ {"label": "adjectives", "pattern": [{"LOWER": "novel"}], "id": "age-bias"}
661
+ {"label": "adjectives", "pattern": [{"LOWER": "obsolete"}], "id": "age-bias"}
662
+ {"label": "adjectives", "pattern": [{"LOWER": "old"}], "id": "age-bias"}
663
+ {"label": "adjectives", "pattern": [{"LOWER": "primordial"}], "id": "age-bias"}
664
+ {"label": "adjectives", "pattern": [{"LOWER": "ragged"}], "id": "age-bias"}
665
+ {"label": "adjectives", "pattern": [{"LOWER": "raw"}], "id": "age-bias"}
666
+ {"label": "adjectives", "pattern": [{"LOWER": "recent"}], "id": "age-bias"}
667
+ {"label": "adjectives", "pattern": [{"LOWER": "senile"}], "id": "age-bias"}
668
+ {"label": "adjectives", "pattern": [{"LOWER": "shabby"}], "id": "age-bias"}
669
+ {"label": "adjectives", "pattern": [{"LOWER": "stale"}], "id": "age-bias"}
670
+ {"label": "adjectives", "pattern": [{"LOWER": "tattered"}], "id": "age-bias"}
671
+ {"label": "adjectives", "pattern": [{"LOWER": "threadbare"}], "id": "age-bias"}
672
+ {"label": "adjectives", "pattern": [{"LOWER": "trite"}], "id": "age-bias"}
673
+ {"label": "adjectives", "pattern": [{"LOWER": "vintage"}], "id": "age-bias"}
674
+ {"label": "adjectives", "pattern": [{"LOWER": "worn"}], "id": "age-bias"}
675
+ {"label": "adjectives", "pattern": [{"LOWER": "young"}], "id": "age-bias"}
676
+ {"label": "adjectives", "pattern": [{"LOWER": "accepting"}], "id": "character-bias"}
677
+ {"label": "adjectives", "pattern": [{"LOWER": "adventurous"}], "id": "character-bias"}
678
+ {"label": "adjectives", "pattern": [{"LOWER": "affable"}], "id": "character-bias"}
679
+ {"label": "adjectives", "pattern": [{"LOWER": "ambitious"}], "id": "character-bias"}
680
+ {"label": "adjectives", "pattern": [{"LOWER": "amiable"}], "id": "character-bias"}
681
+ {"label": "adjectives", "pattern": [{"LOWER": "amicable"}], "id": "character-bias"}
682
+ {"label": "adjectives", "pattern": [{"LOWER": "annoying"}], "id": "character-bias"}
683
+ {"label": "adjectives", "pattern": [{"LOWER": "bold"}], "id": "character-bias"}
684
+ {"label": "adjectives", "pattern": [{"LOWER": "brave"}], "id": "character-bias"}
685
+ {"label": "adjectives", "pattern": [{"LOWER": "bright"}], "id": "character-bias"}
686
+ {"label": "adjectives", "pattern": [{"LOWER": "brutal"}], "id": "character-bias"}
687
+ {"label": "adjectives", "pattern": [{"LOWER": "brute"}], "id": "character-bias"}
688
+ {"label": "adjectives", "pattern": [{"LOWER": "callous"}], "id": "character-bias"}
689
+ {"label": "adjectives", "pattern": [{"LOWER": "calm"}], "id": "character-bias"}
690
+ {"label": "adjectives", "pattern": [{"LOWER": "careful"}], "id": "character-bias"}
691
+ {"label": "adjectives", "pattern": [{"LOWER": "cautious"}], "id": "character-bias"}
692
+ {"label": "adjectives", "pattern": [{"LOWER": "charitable"}], "id": "character-bias"}
693
+ {"label": "adjectives", "pattern": [{"LOWER": "cheerful"}], "id": "character-bias"}
694
+ {"label": "adjectives", "pattern": [{"LOWER": "clever"}], "id": "character-bias"}
695
+ {"label": "adjectives", "pattern": [{"LOWER": "courtly"}], "id": "character-bias"}
696
+ {"label": "adjectives", "pattern": [{"LOWER": "creative"}], "id": "character-bias"}
697
+ {"label": "adjectives", "pattern": [{"LOWER": "cruel"}], "id": "character-bias"}
698
+ {"label": "adjectives", "pattern": [{"LOWER": "curious"}], "id": "character-bias"}
699
+ {"label": "adjectives", "pattern": [{"LOWER": "daring"}], "id": "character-bias"}
700
+ {"label": "adjectives", "pattern": [{"LOWER": "devout"}], "id": "character-bias"}
701
+ {"label": "adjectives", "pattern": [{"LOWER": "eager"}], "id": "character-bias"}
702
+ {"label": "adjectives", "pattern": [{"LOWER": "elegant"}], "id": "character-bias"}
703
+ {"label": "adjectives", "pattern": [{"LOWER": "energetic"}], "id": "character-bias"}
704
+ {"label": "adjectives", "pattern": [{"LOWER": "excited"}], "id": "character-bias"}
705
+ {"label": "adjectives", "pattern": [{"LOWER": "ferocious"}], "id": "character-bias"}
706
+ {"label": "adjectives", "pattern": [{"LOWER": "forgiving"}], "id": "character-bias"}
707
+ {"label": "adjectives", "pattern": [{"LOWER": "free"}], "id": "character-bias"}
708
+ {"label": "adjectives", "pattern": [{"LOWER": "friendly"}], "id": "character-bias"}
709
+ {"label": "adjectives", "pattern": [{"LOWER": "funny"}], "id": "character-bias"}
710
+ {"label": "adjectives", "pattern": [{"LOWER": "generous"}], "id": "character-bias"}
711
+ {"label": "adjectives", "pattern": [{"LOWER": "genteel"}], "id": "character-bias"}
712
+ {"label": "adjectives", "pattern": [{"LOWER": "gentle"}], "id": "character-bias"}
713
+ {"label": "adjectives", "pattern": [{"LOWER": "graceful"}], "id": "character-bias"}
714
+ {"label": "adjectives", "pattern": [{"LOWER": "grim"}], "id": "character-bias"}
715
+ {"label": "adjectives", "pattern": [{"LOWER": "grouchy"}], "id": "character-bias"}
716
+ {"label": "adjectives", "pattern": [{"LOWER": "happy"}], "id": "character-bias"}
717
+ {"label": "adjectives", "pattern": [{"LOWER": "heartless"}], "id": "character-bias"}
718
+ {"label": "adjectives", "pattern": [{"LOWER": "helpful"}], "id": "character-bias"}
719
+ {"label": "adjectives", "pattern": [{"LOWER": "honest"}], "id": "character-bias"}
720
+ {"label": "adjectives", "pattern": [{"LOWER": "humane"}], "id": "character-bias"}
721
+ {"label": "adjectives", "pattern": [{"LOWER": "humble"}], "id": "character-bias"}
722
+ {"label": "adjectives", "pattern": [{"LOWER": "impulsive"}], "id": "character-bias"}
723
+ {"label": "adjectives", "pattern": [{"LOWER": "independent"}], "id": "character-bias"}
724
+ {"label": "adjectives", "pattern": [{"LOWER": "indulgent"}], "id": "character-bias"}
725
+ {"label": "adjectives", "pattern": [{"LOWER": "intense"}], "id": "character-bias"}
726
+ {"label": "adjectives", "pattern": [{"LOWER": "inventive"}], "id": "character-bias"}
727
+ {"label": "adjectives", "pattern": [{"LOWER": "kind"}], "id": "character-bias"}
728
+ {"label": "adjectives", "pattern": [{"LOWER": "lazy"}], "id": "character-bias"}
729
+ {"label": "adjectives", "pattern": [{"LOWER": "lenient"}], "id": "character-bias"}
730
+ {"label": "adjectives", "pattern": [{"LOWER": "loyal"}], "id": "character-bias"}
731
+ {"label": "adjectives", "pattern": [{"LOWER": "meek"}], "id": "character-bias"}
732
+ {"label": "adjectives", "pattern": [{"LOWER": "merciless"}], "id": "character-bias"}
733
+ {"label": "adjectives", "pattern": [{"LOWER": "merry"}], "id": "character-bias"}
734
+ {"label": "adjectives", "pattern": [{"LOWER": "messy"}], "id": "character-bias"}
735
+ {"label": "adjectives", "pattern": [{"LOWER": "mild"}], "id": "character-bias"}
736
+ {"label": "adjectives", "pattern": [{"LOWER": "neat"}], "id": "character-bias"}
737
+ {"label": "adjectives", "pattern": [{"LOWER": "nervous"}], "id": "character-bias"}
738
+ {"label": "adjectives", "pattern": [{"LOWER": "obliging"}], "id": "character-bias"}
739
+ {"label": "adjectives", "pattern": [{"LOWER": "obnoxious"}], "id": "character-bias"}
740
+ {"label": "adjectives", "pattern": [{"LOWER": "odious"}], "id": "character-bias"}
741
+ {"label": "adjectives", "pattern": [{"LOWER": "patient"}], "id": "character-bias"}
742
+ {"label": "adjectives", "pattern": [{"LOWER": "plain"}], "id": "character-bias"}
743
+ {"label": "adjectives", "pattern": [{"LOWER": "pleasant"}], "id": "character-bias"}
744
+ {"label": "adjectives", "pattern": [{"LOWER": "polite"}], "id": "character-bias"}
745
+ {"label": "adjectives", "pattern": [{"LOWER": "proper"}], "id": "character-bias"}
746
+ {"label": "adjectives", "pattern": [{"LOWER": "proud"}], "id": "character-bias"}
747
+ {"label": "adjectives", "pattern": [{"LOWER": "quick"}], "id": "character-bias"}
748
+ {"label": "adjectives", "pattern": [{"LOWER": "quiet"}], "id": "character-bias"}
749
+ {"label": "adjectives", "pattern": [{"LOWER": "refined"}], "id": "character-bias"}
750
+ {"label": "adjectives", "pattern": [{"LOWER": "relaxed"}], "id": "character-bias"}
751
+ {"label": "adjectives", "pattern": [{"LOWER": "religious"}], "id": "character-bias"}
752
+ {"label": "adjectives", "pattern": [{"LOWER": "respectful"}], "id": "character-bias"}
753
+ {"label": "adjectives", "pattern": [{"LOWER": "rude"}], "id": "character-bias"}
754
+ {"label": "adjectives", "pattern": [{"LOWER": "savage"}], "id": "character-bias"}
755
+ {"label": "adjectives", "pattern": [{"LOWER": "selfish"}], "id": "character-bias"}
756
+ {"label": "adjectives", "pattern": [{"LOWER": "sensitive"}], "id": "character-bias"}
757
+ {"label": "adjectives", "pattern": [{"LOWER": "serious"}], "id": "character-bias"}
758
+ {"label": "adjectives", "pattern": [{"LOWER": "shrewd"}], "id": "character-bias"}
759
+ {"label": "adjectives", "pattern": [{"LOWER": "silly"}], "id": "character-bias"}
760
+ {"label": "adjectives", "pattern": [{"LOWER": "simple"}], "id": "character-bias"}
761
+ {"label": "adjectives", "pattern": [{"LOWER": "smart"}], "id": "character-bias"}
762
+ {"label": "adjectives", "pattern": [{"LOWER": "soft"}], "id": "character-bias"}
763
+ {"label": "adjectives", "pattern": [{"LOWER": "sophisticated"}], "id": "character-bias"}
764
+ {"label": "adjectives", "pattern": [{"LOWER": "stern"}], "id": "character-bias"}
765
+ {"label": "adjectives", "pattern": [{"LOWER": "strong"}], "id": "character-bias"}
766
+ {"label": "adjectives", "pattern": [{"LOWER": "stubborn"}], "id": "character-bias"}
767
+ {"label": "adjectives", "pattern": [{"LOWER": "tender"}], "id": "character-bias"}
768
+ {"label": "adjectives", "pattern": [{"LOWER": "tense"}], "id": "character-bias"}
769
+ {"label": "adjectives", "pattern": [{"LOWER": "timid"}], "id": "character-bias"}
770
+ {"label": "adjectives", "pattern": [{"LOWER": "tough"}], "id": "character-bias"}
771
+ {"label": "adjectives", "pattern": [{"LOWER": "trusting"}], "id": "character-bias"}
772
+ {"label": "adjectives", "pattern": [{"LOWER": "urbane"}], "id": "character-bias"}
773
+ {"label": "adjectives", "pattern": [{"LOWER": "vain"}], "id": "character-bias"}
774
+ {"label": "adjectives", "pattern": [{"LOWER": "vicious"}], "id": "character-bias"}
775
+ {"label": "adjectives", "pattern": [{"LOWER": "violent"}], "id": "character-bias"}
776
+ {"label": "adjectives", "pattern": [{"LOWER": "warm"}], "id": "character-bias"}
777
+ {"label": "adjectives", "pattern": [{"LOWER": "wise"}], "id": "character-bias"}
778
+ {"label": "adjectives", "pattern": [{"LOWER": "witty"}], "id": "character-bias"}
779
+ {"label": "adjectives", "pattern": [{"LOWER": "acidic"}], "id": "food-bias"}
780
+ {"label": "adjectives", "pattern": [{"LOWER": "baked"}], "id": "food-bias"}
781
+ {"label": "adjectives", "pattern": [{"LOWER": "bitter"}], "id": "food-bias"}
782
+ {"label": "adjectives", "pattern": [{"LOWER": "bland"}], "id": "food-bias"}
783
+ {"label": "adjectives", "pattern": [{"LOWER": "blended"}], "id": "food-bias"}
784
+ {"label": "adjectives", "pattern": [{"LOWER": "briny"}], "id": "food-bias"}
785
+ {"label": "adjectives", "pattern": [{"LOWER": "buttery"}], "id": "food-bias"}
786
+ {"label": "adjectives", "pattern": [{"LOWER": "candied"}], "id": "food-bias"}
787
+ {"label": "adjectives", "pattern": [{"LOWER": "cheesy"}], "id": "food-bias"}
788
+ {"label": "adjectives", "pattern": [{"LOWER": "chewy"}], "id": "food-bias"}
789
+ {"label": "adjectives", "pattern": [{"LOWER": "chocolaty"}], "id": "food-bias"}
790
+ {"label": "adjectives", "pattern": [{"LOWER": "cold"}], "id": "food-bias"}
791
+ {"label": "adjectives", "pattern": [{"LOWER": "creamy"}], "id": "food-bias"}
792
+ {"label": "adjectives", "pattern": [{"LOWER": "crispy"}], "id": "food-bias"}
793
+ {"label": "adjectives", "pattern": [{"LOWER": "crunchy"}], "id": "food-bias"}
794
+ {"label": "adjectives", "pattern": [{"LOWER": "delicious"}], "id": "food-bias"}
795
+ {"label": "adjectives", "pattern": [{"LOWER": "doughy"}], "id": "food-bias"}
796
+ {"label": "adjectives", "pattern": [{"LOWER": "dry"}], "id": "food-bias"}
797
+ {"label": "adjectives", "pattern": [{"LOWER": "flavorful"}], "id": "food-bias"}
798
+ {"label": "adjectives", "pattern": [{"LOWER": "frozen"}], "id": "food-bias"}
799
+ {"label": "adjectives", "pattern": [{"LOWER": "golden"}], "id": "food-bias"}
800
+ {"label": "adjectives", "pattern": [{"LOWER": "gourmet"}], "id": "food-bias"}
801
+ {"label": "adjectives", "pattern": [{"LOWER": "greasy"}], "id": "food-bias"}
802
+ {"label": "adjectives", "pattern": [{"LOWER": "grilled"}], "id": "food-bias"}
803
+ {"label": "adjectives", "pattern": [{"LOWER": "icy"}], "id": "food-bias"}
804
+ {"label": "adjectives", "pattern": [{"LOWER": "intense"}], "id": "food-bias"}
805
+ {"label": "adjectives", "pattern": [{"LOWER": "jellied"}], "id": "food-bias"}
806
+ {"label": "adjectives", "pattern": [{"LOWER": "juicy"}], "id": "food-bias"}
807
+ {"label": "adjectives", "pattern": [{"LOWER": "jumbo"}], "id": "food-bias"}
808
+ {"label": "adjectives", "pattern": [{"LOWER": "lean"}], "id": "food-bias"}
809
+ {"label": "adjectives", "pattern": [{"LOWER": "marinated"}], "id": "food-bias"}
810
+ {"label": "adjectives", "pattern": [{"LOWER": "mashed"}], "id": "food-bias"}
811
+ {"label": "adjectives", "pattern": [{"LOWER": "mild"}], "id": "food-bias"}
812
+ {"label": "adjectives", "pattern": [{"LOWER": "minty"}], "id": "food-bias"}
813
+ {"label": "adjectives", "pattern": [{"LOWER": "nutty"}], "id": "food-bias"}
814
+ {"label": "adjectives", "pattern": [{"LOWER": "organic"}], "id": "food-bias"}
815
+ {"label": "adjectives", "pattern": [{"LOWER": "piquant"}], "id": "food-bias"}
816
+ {"label": "adjectives", "pattern": [{"LOWER": "plain"}], "id": "food-bias"}
817
+ {"label": "adjectives", "pattern": [{"LOWER": "poached"}], "id": "food-bias"}
818
+ {"label": "adjectives", "pattern": [{"LOWER": "pounded"}], "id": "food-bias"}
819
+ {"label": "adjectives", "pattern": [{"LOWER": "prepared"}], "id": "food-bias"}
820
+ {"label": "adjectives", "pattern": [{"LOWER": "pureed"}], "id": "food-bias"}
821
+ {"label": "adjectives", "pattern": [{"LOWER": "rancid"}], "id": "food-bias"}
822
+ {"label": "adjectives", "pattern": [{"LOWER": "rank"}], "id": "food-bias"}
823
+ {"label": "adjectives", "pattern": [{"LOWER": "rich"}], "id": "food-bias"}
824
+ {"label": "adjectives", "pattern": [{"LOWER": "ripe"}], "id": "food-bias"}
825
+ {"label": "adjectives", "pattern": [{"LOWER": "rubbery"}], "id": "food-bias"}
826
+ {"label": "adjectives", "pattern": [{"LOWER": "salty"}], "id": "food-bias"}
827
+ {"label": "adjectives", "pattern": [{"LOWER": "saucy"}], "id": "food-bias"}
828
+ {"label": "adjectives", "pattern": [{"LOWER": "savory"}], "id": "food-bias"}
829
+ {"label": "adjectives", "pattern": [{"LOWER": "seasoned"}], "id": "food-bias"}
830
+ {"label": "adjectives", "pattern": [{"LOWER": "sharp"}], "id": "food-bias"}
831
+ {"label": "adjectives", "pattern": [{"LOWER": "simmered"}], "id": "food-bias"}
832
+ {"label": "adjectives", "pattern": [{"LOWER": "smoked"}], "id": "food-bias"}
833
+ {"label": "adjectives", "pattern": [{"LOWER": "smoky"}], "id": "food-bias"}
834
+ {"label": "adjectives", "pattern": [{"LOWER": "sour"}], "id": "food-bias"}
835
+ {"label": "adjectives", "pattern": [{"LOWER": "spicy"}], "id": "food-bias"}
836
+ {"label": "adjectives", "pattern": [{"LOWER": "steamed"}], "id": "food-bias"}
837
+ {"label": "adjectives", "pattern": [{"LOWER": "sticky"}], "id": "food-bias"}
838
+ {"label": "adjectives", "pattern": [{"LOWER": "stringy"}], "id": "food-bias"}
839
+ {"label": "adjectives", "pattern": [{"LOWER": "strong"}], "id": "food-bias"}
840
+ {"label": "adjectives", "pattern": [{"LOWER": "succulent"}], "id": "food-bias"}
841
+ {"label": "adjectives", "pattern": [{"LOWER": "sugary"}], "id": "food-bias"}
842
+ {"label": "adjectives", "pattern": [{"LOWER": "sweet"}], "id": "food-bias"}
843
+ {"label": "adjectives", "pattern": [{"LOWER": "syrupy"}], "id": "food-bias"}
844
+ {"label": "adjectives", "pattern": [{"LOWER": "tangy"}], "id": "food-bias"}
845
+ {"label": "adjectives", "pattern": [{"LOWER": "tart"}], "id": "food-bias"}
846
+ {"label": "adjectives", "pattern": [{"LOWER": "tender"}], "id": "food-bias"}
847
+ {"label": "adjectives", "pattern": [{"LOWER": "toasted"}], "id": "food-bias"}
848
+ {"label": "adjectives", "pattern": [{"LOWER": "topped"}], "id": "food-bias"}
849
+ {"label": "adjectives", "pattern": [{"LOWER": "tossed"}], "id": "food-bias"}
850
+ {"label": "adjectives", "pattern": [{"LOWER": "yummy"}], "id": "food-bias"}
851
+ {"label": "adjectives", "pattern": [{"LOWER": "zingy"}], "id": "food-bias"}
852
+ {"label": "adjectives", "pattern": [{"LOWER": "braised"}], "id": "food-bias"}
853
+ {"label": "adjectives", "pattern": [{"LOWER": "fried"}], "id": "food-bias"}
854
+ {"label": "adjectives", "pattern": [{"LOWER": "fermented"}], "id": "food-bias"}
855
+ {"label": "adjectives", "pattern": [{"LOWER": "milky"}], "id": "food-bias"}
856
+ {"label": "adjectives", "pattern": [{"LOWER": "damaged"}], "id": "food-bias"}
857
+ {"label": "adjectives", "pattern": [{"LOWER": "spicy"}], "id": "food-bias"}
858
+ {"label": "adjectives", "pattern": [{"LOWER": "edible"}], "id": "food-bias"}
859
+ {"label": "adjectives", "pattern": [{"LOWER": "nutritious"}], "id": "food-bias"}
860
+ {"label": "adjectives", "pattern": [{"LOWER": "citric"}], "id": "food-bias"}
861
+ {"label": "adjectives", "pattern": [{"LOWER": "cloying"}], "id": "food-bias"}
862
+ {"label": "adjectives", "pattern": [{"LOWER": "caramelized"}], "id": "food-bias"}
NER-tweaks/age-bias.jsonl ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"label": "age", "pattern": [{"LOWER": "advanced"}], "id": "age-bias"}
2
+ {"label": "age", "pattern": [{"LOWER": "aged"}], "id": "age-bias"}
3
+ {"label": "age", "pattern": [{"LOWER": "ancient"}], "id": "age-bias"}
4
+ {"label": "age", "pattern": [{"LOWER": "antique"}], "id": "age-bias"}
5
+ {"label": "age", "pattern": [{"LOWER": "archaic"}], "id": "age-bias"}
6
+ {"label": "age", "pattern": [{"LOWER": "contemporary"}], "id": "age-bias"}
7
+ {"label": "age", "pattern": [{"LOWER": "current"}], "id": "age-bias"}
8
+ {"label": "age", "pattern": [{"LOWER": "frayed"}], "id": "age-bias"}
9
+ {"label": "age", "pattern": [{"LOWER": "fresh"}], "id": "age-bias"}
10
+ {"label": "age", "pattern": [{"LOWER": "grizzled"}], "id": "age-bias"}
11
+ {"label": "age", "pattern": [{"LOWER": "hoary"}], "id": "age-bias"}
12
+ {"label": "age", "pattern": [{"LOWER": "immature"}], "id": "age-bias"}
13
+ {"label": "age", "pattern": [{"LOWER": "juvenile"}], "id": "age-bias"}
14
+ {"label": "age", "pattern": [{"LOWER": "mature"}], "id": "age-bias"}
15
+ {"label": "age", "pattern": [{"LOWER": "modern"}], "id": "age-bias"}
16
+ {"label": "age", "pattern": [{"LOWER": "new"}], "id": "age-bias"}
17
+ {"label": "age", "pattern": [{"LOWER": "novel"}], "id": "age-bias"}
18
+ {"label": "age", "pattern": [{"LOWER": "obsolete"}], "id": "age-bias"}
19
+ {"label": "age", "pattern": [{"LOWER": "old"}], "id": "age-bias"}
20
+ {"label": "age", "pattern": [{"LOWER": "primordial"}], "id": "age-bias"}
21
+ {"label": "age", "pattern": [{"LOWER": "ragged"}], "id": "age-bias"}
22
+ {"label": "age", "pattern": [{"LOWER": "raw"}], "id": "age-bias"}
23
+ {"label": "age", "pattern": [{"LOWER": "recent"}], "id": "age-bias"}
24
+ {"label": "age", "pattern": [{"LOWER": "senile"}], "id": "age-bias"}
25
+ {"label": "age", "pattern": [{"LOWER": "shabby"}], "id": "age-bias"}
26
+ {"label": "age", "pattern": [{"LOWER": "stale"}], "id": "age-bias"}
27
+ {"label": "age", "pattern": [{"LOWER": "tattered"}], "id": "age-bias"}
28
+ {"label": "age", "pattern": [{"LOWER": "threadbare"}], "id": "age-bias"}
29
+ {"label": "age", "pattern": [{"LOWER": "trite"}], "id": "age-bias"}
30
+ {"label": "age", "pattern": [{"LOWER": "vintage"}], "id": "age-bias"}
31
+ {"label": "age", "pattern": [{"LOWER": "worn"}], "id": "age-bias"}
32
+ {"label": "age", "pattern": [{"LOWER": "young"}], "id": "age-bias"}
NER-tweaks/entity-ruler-input.jsonl ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"label": "GENDER", "pattern": [{"LOWER": "woman"}],"id":"female-bias"}
2
+ {"label": "GENDER", "pattern": [{"LOWER": "feminine"}],"id":"female-bias"}
3
+ {"label": "GENDER", "pattern": [{"LOWER": "female"}],"id":"female-bias"}
4
+ {"label": "GENDER", "pattern": [{"LOWER": "lady"}],"id":"female-bias"}
5
+ {"label": "GENDER", "pattern": [{"LOWER": "girl"}],"id":"female-bias"}
6
+ {"label": "GENDER", "pattern": [{"LOWER": "she"}],"id":"female-bias"}
7
+ {"label": "GENDER", "pattern": [{"LOWER": "her"}],"id":"female-bias"}
8
+ {"label": "GENDER", "pattern": [{"LOWER": "hers"}],"id":"female-bias"}
9
+ {"label": "GENDER", "pattern": [{"LOWER": "herself"}],"id":"female-bias"}
10
+ {"label": "GENDER", "pattern": [{"LOWER": "mother"}],"id":"female-bias"}
11
+ {"label": "GENDER", "pattern": [{"LOWER": "grandmother"}],"id":"female-bias"}
12
+ {"label": "GENDER", "pattern": [{"LOWER": "grandma"}],"id":"female-bias"}
13
+ {"label": "GENDER", "pattern": [{"LOWER": "momma"}],"id":"female-bias"}
14
+ {"label": "GENDER", "pattern": [{"LOWER": "mommy"}],"id":"female-bias"}
15
+ {"label": "GENDER", "pattern": [{"LOWER": "babe"}],"id":"female-bias"}
16
+ {"label": "GENDER", "pattern": [{"LOWER": "daughter"}],"id":"female-bias"}
17
+ {"label": "GENDER", "pattern": [{"LOWER": "sister"}],"id":"female-bias"}
18
+ {"label": "GENDER", "pattern": [{"LOWER": "niece"}],"id":"female-bias"}
19
+ {"label": "GENDER", "pattern": [{"LOWER": "aunt"}],"id":"female-bias"}
20
+ {"label": "GENDER", "pattern": [{"LOWER": "girlfriend"}],"id":"female-bias"}
21
+ {"label": "GENDER", "pattern": [{"LOWER": "wife"}],"id":"female-bias"}
22
+ {"label": "GENDER", "pattern": [{"LOWER": "mistress"}],"id":"female-bias"}
23
+ {"label": "GENDER", "pattern": [{"LOWER": "man"}],"id":"male-bias"}
24
+ {"label": "GENDER", "pattern": [{"LOWER": "masculine"}],"id":"male-bias"}
25
+ {"label": "GENDER", "pattern": [{"LOWER": "male"}],"id":"male-bias"}
26
+ {"label": "GENDER", "pattern": [{"LOWER": "dude"}],"id":"male-bias"}
27
+ {"label": "GENDER", "pattern": [{"LOWER": "boy"}],"id":"male-bias"}
28
+ {"label": "GENDER", "pattern": [{"LOWER": "he"}],"id":"male-bias"}
29
+ {"label": "GENDER", "pattern": [{"LOWER": "his"}],"id":"male-bias"}
30
+ {"label": "GENDER", "pattern": [{"LOWER": "him"}],"id":"male-bias"}
31
+ {"label": "GENDER", "pattern": [{"LOWER": "himself"}],"id":"male-bias"}
32
+ {"label": "GENDER", "pattern": [{"LOWER": "father"}],"id":"male-bias"}
33
+ {"label": "GENDER", "pattern": [{"LOWER": "grandfather"}],"id":"male-bias"}
34
+ {"label": "GENDER", "pattern": [{"LOWER": "grandpa"}],"id":"male-bias"}
35
+ {"label": "GENDER", "pattern": [{"LOWER": "poppa"}],"id":"male-bias"}
36
+ {"label": "GENDER", "pattern": [{"LOWER": "daddy"}],"id":"male-bias"}
37
+ {"label": "GENDER", "pattern": [{"LOWER": "lad"}],"id":"male-bias"}
38
+ {"label": "GENDER", "pattern": [{"LOWER": "son"}],"id":"male-bias"}
39
+ {"label": "GENDER", "pattern": [{"LOWER": "brother"}],"id":"male-bias"}
40
+ {"label": "GENDER", "pattern": [{"LOWER": "nephew"}],"id":"male-bias"}
41
+ {"label": "GENDER", "pattern": [{"LOWER": "uncle"}],"id":"male-bias"}
42
+ {"label": "GENDER", "pattern": [{"LOWER": "boyfriend"}],"id":"male-bias"}
43
+ {"label": "GENDER", "pattern": [{"LOWER": "husband"}],"id":"male-bias"}
44
+ {"label": "GENDER", "pattern": [{"LOWER": "gentleman"}],"id":"male-bias"}
NER-tweaks/gender-test.jsonl ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"label": "SOGI", "pattern": [{"LOWER": "woman", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
2
+ {"label": "SOGI", "pattern": [{"LOWER": "feminine", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
3
+ {"label": "SOGI", "pattern": [{"LOWER": "female", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
4
+ {"label": "SOGI", "pattern": [{"LOWER": "lady", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
5
+ {"label": "SOGI", "pattern": [{"LOWER": "girl", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
6
+ {"label": "SOGI", "pattern": [{"LOWER": "she", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
7
+ {"label": "SOGI", "pattern": [{"LOWER": "hers", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
8
+ {"label": "SOGI", "pattern": [{"LOWER": "her", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
9
+ {"label": "SOGI", "pattern": [{"LOWER": "herself", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
10
+ {"label": "SOGI", "pattern": [{"LOWER": "mother", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
11
+ {"label": "SOGI", "pattern": [{"LOWER": "grandmother", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
12
+ {"label": "SOGI", "pattern": [{"LOWER": "grandma", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
13
+ {"label": "SOGI", "pattern": [{"LOWER": "momma", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
14
+ {"label": "SOGI", "pattern": [{"LOWER": "mommy", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
15
+ {"label": "SOGI", "pattern": [{"LOWER": "babe", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
16
+ {"label": "SOGI", "pattern": [{"LOWER": "daughter", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
17
+ {"label": "SOGI", "pattern": [{"LOWER": "sister", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
18
+ {"label": "SOGI", "pattern": [{"LOWER": "niece", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
19
+ {"label": "SOGI", "pattern": [{"LOWER": "aunt", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
20
+ {"label": "SOGI", "pattern": [{"LOWER": "girlfriend", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
21
+ {"label": "SOGI", "pattern": [{"LOWER": "wife", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
22
+ {"label": "SOGI", "pattern": [{"LOWER": "mistress", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
23
+ {"label": "SOGI", "pattern": [{"LOWER": "man", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
24
+ {"label": "SOGI", "pattern": [{"LOWER": "masculine", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
25
+ {"label": "SOGI", "pattern": [{"LOWER": "male", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
26
+ {"label": "SOGI", "pattern": [{"LOWER": "dude", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
27
+ {"label": "SOGI", "pattern": [{"LOWER": "boy", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
28
+ {"label": "SOGI", "pattern": [{"LOWER": "he", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
29
+ {"label": "SOGI", "pattern": [{"LOWER": "his", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
30
+ {"label": "SOGI", "pattern": [{"LOWER": "him", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
31
+ {"label": "SOGI", "pattern": [{"LOWER": "himself", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
32
+ {"label": "SOGI", "pattern": [{"LOWER": "father", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
33
+ {"label": "SOGI", "pattern": [{"LOWER": "grandfather", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
34
+ {"label": "SOGI", "pattern": [{"LOWER": "grandpa", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
35
+ {"label": "SOGI", "pattern": [{"LOWER": "poppa", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
36
+ {"label": "SOGI", "pattern": [{"LOWER": "daddy", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
37
+ {"label": "SOGI", "pattern": [{"LOWER": "lad", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
38
+ {"label": "SOGI", "pattern": [{"LOWER": "son", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
39
+ {"label": "SOGI", "pattern": [{"LOWER": "brother", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
40
+ {"label": "SOGI", "pattern": [{"LOWER": "nephew", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
41
+ {"label": "SOGI", "pattern": [{"LOWER": "uncle", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
42
+ {"label": "SOGI", "pattern": [{"LOWER": "boyfriend", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
43
+ {"label": "SOGI", "pattern": [{"LOWER": "husband", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
44
+ {"label": "SOGI", "pattern": [{"LOWER": "gentleman", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
45
+ {"label": "SOGI", "pattern": [{"LOWER": "trans"}, {"text":"-"}, {"LOWER":"woman"}],"id":"lbgtq-bias"}
46
+ {"label": "SOGI", "pattern": [{"LOWER": "trans"}, {"text":"-"}, {"LOWER":"man"}],"id":"lbgtq-bias"}
47
+ {"label": "SOGI", "pattern": [{"LOWER": "transexual"}],"id":"lbgtq-bias"}
48
+ {"label": "SOGI", "pattern": [{"LOWER": "bisexual"}],"id":"lbgtq-bias"}
49
+ {"label": "SOGI", "pattern": [{"LOWER": "gay"}],"id":"lbgtq-bias"}
50
+ {"label": "SOGI", "pattern": [{"LOWER": "gender-fluid"}],"id":"lbgtq-bias"}
51
+ {"label": "SOGI", "pattern": [{"LOWER": "transexual"}],"id":"lbgtq-bias"}
52
+ {"label": "SOGI", "pattern": [{"LOWER": "genderqueer"}],"id":"lbgtq-bias"}
53
+ {"label": "SOGI", "pattern": [{"LOWER": "lesbian"}],"id":"lbgtq-bias"}
54
+ {"label": "SOGI", "pattern": [{"LOWER": "non-binary"}],"id":"lbgtq-bias"}
55
+ {"label": "SOGI", "pattern": [{"LOWER": "queer"}],"id":"lbgtq-bias"}
56
+ {"label": "SOGI", "pattern": [{"LOWER": "pansexual"}],"id":"lbgtq-bias"}
57
+ {"label": "SOGI", "pattern": [{"LOWER": "transgender"}],"id":"lbgtq-bias"}
58
+ {"label": "SOGI", "pattern": [{"LOWER": "transwoman"}],"id":"lbgtq-bias"}
59
+ {"label": "SOGI", "pattern": [{"LOWER": "transman"}],"id":"lbgtq-bias"}
NER-tweaks/main-ruler-bias.jsonl ADDED
@@ -0,0 +1,862 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"label": "SOGI", "pattern": [{"LOWER": "woman", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
2
+ {"label": "SOGI", "pattern": [{"LOWER": "feminine", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
3
+ {"label": "SOGI", "pattern": [{"LOWER": "female", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
4
+ {"label": "SOGI", "pattern": [{"LOWER": "lady", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
5
+ {"label": "SOGI", "pattern": [{"LOWER": "girl", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
6
+ {"label": "SOGI", "pattern": [{"LOWER": "she", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
7
+ {"label": "SOGI", "pattern": [{"LOWER": "hers", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
8
+ {"label": "SOGI", "pattern": [{"LOWER": "her", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
9
+ {"label": "SOGI", "pattern": [{"LOWER": "herself", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
10
+ {"label": "SOGI", "pattern": [{"LOWER": "mother", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
11
+ {"label": "SOGI", "pattern": [{"LOWER": "grandmother", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
12
+ {"label": "SOGI", "pattern": [{"LOWER": "grandma", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
13
+ {"label": "SOGI", "pattern": [{"LOWER": "momma", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
14
+ {"label": "SOGI", "pattern": [{"LOWER": "mommy", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
15
+ {"label": "SOGI", "pattern": [{"LOWER": "babe", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
16
+ {"label": "SOGI", "pattern": [{"LOWER": "daughter", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
17
+ {"label": "SOGI", "pattern": [{"LOWER": "sister", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
18
+ {"label": "SOGI", "pattern": [{"LOWER": "niece", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
19
+ {"label": "SOGI", "pattern": [{"LOWER": "aunt", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
20
+ {"label": "SOGI", "pattern": [{"LOWER": "girlfriend", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
21
+ {"label": "SOGI", "pattern": [{"LOWER": "wife", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
22
+ {"label": "SOGI", "pattern": [{"LOWER": "mistress", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"female-bias"}
23
+ {"label": "SOGI", "pattern": [{"LOWER": "man", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
24
+ {"label": "SOGI", "pattern": [{"LOWER": "masculine", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
25
+ {"label": "SOGI", "pattern": [{"LOWER": "male", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
26
+ {"label": "SOGI", "pattern": [{"LOWER": "dude", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
27
+ {"label": "SOGI", "pattern": [{"LOWER": "boy", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
28
+ {"label": "SOGI", "pattern": [{"LOWER": "he", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
29
+ {"label": "SOGI", "pattern": [{"LOWER": "his", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
30
+ {"label": "SOGI", "pattern": [{"LOWER": "him", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
31
+ {"label": "SOGI", "pattern": [{"LOWER": "himself", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
32
+ {"label": "SOGI", "pattern": [{"LOWER": "father", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
33
+ {"label": "SOGI", "pattern": [{"LOWER": "grandfather", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
34
+ {"label": "SOGI", "pattern": [{"LOWER": "grandpa", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
35
+ {"label": "SOGI", "pattern": [{"LOWER": "poppa", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
36
+ {"label": "SOGI", "pattern": [{"LOWER": "daddy", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
37
+ {"label": "SOGI", "pattern": [{"LOWER": "lad", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
38
+ {"label": "SOGI", "pattern": [{"LOWER": "son", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
39
+ {"label": "SOGI", "pattern": [{"LOWER": "brother", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
40
+ {"label": "SOGI", "pattern": [{"LOWER": "nephew", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
41
+ {"label": "SOGI", "pattern": [{"LOWER": "uncle", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
42
+ {"label": "SOGI", "pattern": [{"LOWER": "boyfriend", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
43
+ {"label": "SOGI", "pattern": [{"LOWER": "husband", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
44
+ {"label": "SOGI", "pattern": [{"LOWER": "gentleman", "POS": {"IN": ["NOUN", "PRON"]}}],"id":"male-bias"}
45
+ {"label": "SOGI", "pattern": [{"LOWER": "trans"}, {"text":"-"}, {"LOWER":"woman"}],"id":"lbgtq-bias"}
46
+ {"label": "SOGI", "pattern": [{"LOWER": "trans"}, {"text":"-"}, {"LOWER":"man"}],"id":"lbgtq-bias"}
47
+ {"label": "SOGI", "pattern": [{"LOWER": "transexual"}],"id":"lbgtq-bias"}
48
+ {"label": "SOGI", "pattern": [{"LOWER": "bisexual"}],"id":"lbgtq-bias"}
49
+ {"label": "SOGI", "pattern": [{"LOWER": "gay"}],"id":"lbgtq-bias"}
50
+ {"label": "SOGI", "pattern": [{"LOWER": "gender-fluid"}],"id":"lbgtq-bias"}
51
+ {"label": "SOGI", "pattern": [{"LOWER": "transexual"}],"id":"lbgtq-bias"}
52
+ {"label": "SOGI", "pattern": [{"LOWER": "genderqueer"}],"id":"lbgtq-bias"}
53
+ {"label": "SOGI", "pattern": [{"LOWER": "lesbian"}],"id":"lbgtq-bias"}
54
+ {"label": "SOGI", "pattern": [{"LOWER": "non-binary"}],"id":"lbgtq-bias"}
55
+ {"label": "SOGI", "pattern": [{"LOWER": "queer"}],"id":"lbgtq-bias"}
56
+ {"label": "SOGI", "pattern": [{"LOWER": "pansexual"}],"id":"lbgtq-bias"}
57
+ {"label": "SOGI", "pattern": [{"LOWER": "transgender"}],"id":"lbgtq-bias"}
58
+ {"label": "SOGI", "pattern": [{"LOWER": "transwoman"}],"id":"lbgtq-bias"}
59
+ {"label": "SOGI", "pattern": [{"LOWER": "transman"}],"id":"lbgtq-bias"}
60
+ {"label": "adjectives", "pattern": [{"LOWER": "agile"}], "id": "speed-bias"}
61
+ {"label": "adjectives", "pattern": [{"LOWER": "express"}], "id": "speed-bias"}
62
+ {"label": "adjectives", "pattern": [{"LOWER": "fast"}], "id": "speed-bias"}
63
+ {"label": "adjectives", "pattern": [{"LOWER": "hasty"}], "id": "speed-bias"}
64
+ {"label": "adjectives", "pattern": [{"LOWER": "immediate"}], "id": "speed-bias"}
65
+ {"label": "adjectives", "pattern": [{"LOWER": "instant"}], "id": "speed-bias"}
66
+ {"label": "adjectives", "pattern": [{"LOWER": "late"}], "id": "speed-bias"}
67
+ {"label": "adjectives", "pattern": [{"LOWER": "lazy"}], "id": "speed-bias"}
68
+ {"label": "adjectives", "pattern": [{"LOWER": "nimble"}], "id": "speed-bias"}
69
+ {"label": "adjectives", "pattern": [{"LOWER": "poky"}], "id": "speed-bias"}
70
+ {"label": "adjectives", "pattern": [{"LOWER": "prompt"}], "id": "speed-bias"}
71
+ {"label": "adjectives", "pattern": [{"LOWER": "quick"}], "id": "speed-bias"}
72
+ {"label": "adjectives", "pattern": [{"LOWER": "rapid"}], "id": "speed-bias"}
73
+ {"label": "adjectives", "pattern": [{"LOWER": "slow"}], "id": "speed-bias"}
74
+ {"label": "adjectives", "pattern": [{"LOWER": "sluggish"}], "id": "speed-bias"}
75
+ {"label": "adjectives", "pattern": [{"LOWER": "speedy"}], "id": "speed-bias"}
76
+ {"label": "adjectives", "pattern": [{"LOWER": "spry"}], "id": "speed-bias"}
77
+ {"label": "adjectives", "pattern": [{"LOWER": "swift"}], "id": "speed-bias"}
78
+ {"label": "adjectives", "pattern": [{"LOWER": "arctic"}], "id": "weather-bias"}
79
+ {"label": "adjectives", "pattern": [{"LOWER": "arid"}], "id": "weather-bias"}
80
+ {"label": "adjectives", "pattern": [{"LOWER": "breezy"}], "id": "weather-bias"}
81
+ {"label": "adjectives", "pattern": [{"LOWER": "calm"}], "id": "weather-bias"}
82
+ {"label": "adjectives", "pattern": [{"LOWER": "chilly"}], "id": "weather-bias"}
83
+ {"label": "adjectives", "pattern": [{"LOWER": "cloudy"}], "id": "weather-bias"}
84
+ {"label": "adjectives", "pattern": [{"LOWER": "cold"}], "id": "weather-bias"}
85
+ {"label": "adjectives", "pattern": [{"LOWER": "cool"}], "id": "weather-bias"}
86
+ {"label": "adjectives", "pattern": [{"LOWER": "damp"}], "id": "weather-bias"}
87
+ {"label": "adjectives", "pattern": [{"LOWER": "dark"}], "id": "weather-bias"}
88
+ {"label": "adjectives", "pattern": [{"LOWER": "dry"}], "id": "weather-bias"}
89
+ {"label": "adjectives", "pattern": [{"LOWER": "foggy"}], "id": "weather-bias"}
90
+ {"label": "adjectives", "pattern": [{"LOWER": "freezing"}], "id": "weather-bias"}
91
+ {"label": "adjectives", "pattern": [{"LOWER": "frosty"}], "id": "weather-bias"}
92
+ {"label": "adjectives", "pattern": [{"LOWER": "great"}], "id": "weather-bias"}
93
+ {"label": "adjectives", "pattern": [{"LOWER": "hot"}], "id": "weather-bias"}
94
+ {"label": "adjectives", "pattern": [{"LOWER": "humid"}], "id": "weather-bias"}
95
+ {"label": "adjectives", "pattern": [{"LOWER": "icy"}], "id": "weather-bias"}
96
+ {"label": "adjectives", "pattern": [{"LOWER": "light"}], "id": "weather-bias"}
97
+ {"label": "adjectives", "pattern": [{"LOWER": "mild"}], "id": "weather-bias"}
98
+ {"label": "adjectives", "pattern": [{"LOWER": "nice"}], "id": "weather-bias"}
99
+ {"label": "adjectives", "pattern": [{"LOWER": "overcast"}], "id": "weather-bias"}
100
+ {"label": "adjectives", "pattern": [{"LOWER": "rainy"}], "id": "weather-bias"}
101
+ {"label": "adjectives", "pattern": [{"LOWER": "smoggy"}], "id": "weather-bias"}
102
+ {"label": "adjectives", "pattern": [{"LOWER": "snowy"}], "id": "weather-bias"}
103
+ {"label": "adjectives", "pattern": [{"LOWER": "sunny"}], "id": "weather-bias"}
104
+ {"label": "adjectives", "pattern": [{"LOWER": "warm"}], "id": "weather-bias"}
105
+ {"label": "adjectives", "pattern": [{"LOWER": "windy"}], "id": "weather-bias"}
106
+ {"label": "adjectives", "pattern": [{"LOWER": "wintry"}], "id": "weather-bias"}
107
+ {"label": "adjectives", "pattern": [{"LOWER": "bent"}], "id": "shape-bias"}
108
+ {"label": "adjectives", "pattern": [{"LOWER": "blocky"}], "id": "shape-bias"}
109
+ {"label": "adjectives", "pattern": [{"LOWER": "boxy"}], "id": "shape-bias"}
110
+ {"label": "adjectives", "pattern": [{"LOWER": "broad"}], "id": "shape-bias"}
111
+ {"label": "adjectives", "pattern": [{"LOWER": "chunky"}], "id": "shape-bias"}
112
+ {"label": "adjectives", "pattern": [{"LOWER": "compact"}], "id": "shape-bias"}
113
+ {"label": "adjectives", "pattern": [{"LOWER": "fat"}], "id": "shape-bias"}
114
+ {"label": "adjectives", "pattern": [{"LOWER": "flat"}], "id": "shape-bias"}
115
+ {"label": "adjectives", "pattern": [{"LOWER": "full"}], "id": "shape-bias"}
116
+ {"label": "adjectives", "pattern": [{"LOWER": "narrow"}], "id": "shape-bias"}
117
+ {"label": "adjectives", "pattern": [{"LOWER": "pointed"}], "id": "shape-bias"}
118
+ {"label": "adjectives", "pattern": [{"LOWER": "round"}], "id": "shape-bias"}
119
+ {"label": "adjectives", "pattern": [{"LOWER": "rounded"}], "id": "shape-bias"}
120
+ {"label": "adjectives", "pattern": [{"LOWER": "skinny"}], "id": "shape-bias"}
121
+ {"label": "adjectives", "pattern": [{"LOWER": "slim"}], "id": "shape-bias"}
122
+ {"label": "adjectives", "pattern": [{"LOWER": "solid"}], "id": "shape-bias"}
123
+ {"label": "adjectives", "pattern": [{"LOWER": "straight"}], "id": "shape-bias"}
124
+ {"label": "adjectives", "pattern": [{"LOWER": "thick"}], "id": "shape-bias"}
125
+ {"label": "adjectives", "pattern": [{"LOWER": "thin"}], "id": "shape-bias"}
126
+ {"label": "adjectives", "pattern": [{"LOWER": "wide"}], "id": "shape-bias"}
127
+ {"label": "adjectives", "pattern": [{"LOWER": "blaring"}], "id": "sound-bias"}
128
+ {"label": "adjectives", "pattern": [{"LOWER": "booming"}], "id": "sound-bias"}
129
+ {"label": "adjectives", "pattern": [{"LOWER": "deafening"}], "id": "sound-bias"}
130
+ {"label": "adjectives", "pattern": [{"LOWER": "faint"}], "id": "sound-bias"}
131
+ {"label": "adjectives", "pattern": [{"LOWER": "gentle"}], "id": "sound-bias"}
132
+ {"label": "adjectives", "pattern": [{"LOWER": "grating"}], "id": "sound-bias"}
133
+ {"label": "adjectives", "pattern": [{"LOWER": "hushed"}], "id": "sound-bias"}
134
+ {"label": "adjectives", "pattern": [{"LOWER": "loud"}], "id": "sound-bias"}
135
+ {"label": "adjectives", "pattern": [{"LOWER": "muffled"}], "id": "sound-bias"}
136
+ {"label": "adjectives", "pattern": [{"LOWER": "mute"}], "id": "sound-bias"}
137
+ {"label": "adjectives", "pattern": [{"LOWER": "noisy"}], "id": "sound-bias"}
138
+ {"label": "adjectives", "pattern": [{"LOWER": "piercing"}], "id": "sound-bias"}
139
+ {"label": "adjectives", "pattern": [{"LOWER": "quiet"}], "id": "sound-bias"}
140
+ {"label": "adjectives", "pattern": [{"LOWER": "roaring"}], "id": "sound-bias"}
141
+ {"label": "adjectives", "pattern": [{"LOWER": "rowdy"}], "id": "sound-bias"}
142
+ {"label": "adjectives", "pattern": [{"LOWER": "silent"}], "id": "sound-bias"}
143
+ {"label": "adjectives", "pattern": [{"LOWER": "soft"}], "id": "sound-bias"}
144
+ {"label": "adjectives", "pattern": [{"LOWER": "thundering"}], "id": "sound-bias"}
145
+ {"label": "adjectives", "pattern": [{"LOWER": "absolute"}], "id": "physics-bias"}
146
+ {"label": "adjectives", "pattern": [{"LOWER": "achromatic"}], "id": "physics-bias"}
147
+ {"label": "adjectives", "pattern": [{"LOWER": "acoustic"}], "id": "physics-bias"}
148
+ {"label": "adjectives", "pattern": [{"LOWER": "adiabatic"}], "id": "physics-bias"}
149
+ {"label": "adjectives", "pattern": [{"LOWER": "alternating"}], "id": "physics-bias"}
150
+ {"label": "adjectives", "pattern": [{"LOWER": "atomic"}], "id": "physics-bias"}
151
+ {"label": "adjectives", "pattern": [{"LOWER": "binding"}], "id": "physics-bias"}
152
+ {"label": "adjectives", "pattern": [{"LOWER": "brownian"}], "id": "physics-bias"}
153
+ {"label": "adjectives", "pattern": [{"LOWER": "buoyant"}], "id": "physics-bias"}
154
+ {"label": "adjectives", "pattern": [{"LOWER": "chromatic"}], "id": "physics-bias"}
155
+ {"label": "adjectives", "pattern": [{"LOWER": "closed"}], "id": "physics-bias"}
156
+ {"label": "adjectives", "pattern": [{"LOWER": "coherent"}], "id": "physics-bias"}
157
+ {"label": "adjectives", "pattern": [{"LOWER": "critical"}], "id": "physics-bias"}
158
+ {"label": "adjectives", "pattern": [{"LOWER": "dense"}], "id": "physics-bias"}
159
+ {"label": "adjectives", "pattern": [{"LOWER": "direct"}], "id": "physics-bias"}
160
+ {"label": "adjectives", "pattern": [{"LOWER": "electric"}], "id": "physics-bias"}
161
+ {"label": "adjectives", "pattern": [{"LOWER": "electrical"}], "id": "physics-bias"}
162
+ {"label": "adjectives", "pattern": [{"LOWER": "endothermic"}], "id": "physics-bias"}
163
+ {"label": "adjectives", "pattern": [{"LOWER": "exothermic"}], "id": "physics-bias"}
164
+ {"label": "adjectives", "pattern": [{"LOWER": "free"}], "id": "physics-bias"}
165
+ {"label": "adjectives", "pattern": [{"LOWER": "fundamental"}], "id": "physics-bias"}
166
+ {"label": "adjectives", "pattern": [{"LOWER": "gravitational"}], "id": "physics-bias"}
167
+ {"label": "adjectives", "pattern": [{"LOWER": "internal"}], "id": "physics-bias"}
168
+ {"label": "adjectives", "pattern": [{"LOWER": "isobaric"}], "id": "physics-bias"}
169
+ {"label": "adjectives", "pattern": [{"LOWER": "isochoric"}], "id": "physics-bias"}
170
+ {"label": "adjectives", "pattern": [{"LOWER": "isothermal"}], "id": "physics-bias"}
171
+ {"label": "adjectives", "pattern": [{"LOWER": "kinetic"}], "id": "physics-bias"}
172
+ {"label": "adjectives", "pattern": [{"LOWER": "latent"}], "id": "physics-bias"}
173
+ {"label": "adjectives", "pattern": [{"LOWER": "magnetic"}], "id": "physics-bias"}
174
+ {"label": "adjectives", "pattern": [{"LOWER": "mechanical"}], "id": "physics-bias"}
175
+ {"label": "adjectives", "pattern": [{"LOWER": "natural"}], "id": "physics-bias"}
176
+ {"label": "adjectives", "pattern": [{"LOWER": "nuclear"}], "id": "physics-bias"}
177
+ {"label": "adjectives", "pattern": [{"LOWER": "open"}], "id": "physics-bias"}
178
+ {"label": "adjectives", "pattern": [{"LOWER": "optical"}], "id": "physics-bias"}
179
+ {"label": "adjectives", "pattern": [{"LOWER": "potential"}], "id": "physics-bias"}
180
+ {"label": "adjectives", "pattern": [{"LOWER": "primary"}], "id": "physics-bias"}
181
+ {"label": "adjectives", "pattern": [{"LOWER": "progressive"}], "id": "physics-bias"}
182
+ {"label": "adjectives", "pattern": [{"LOWER": "quantum"}], "id": "physics-bias"}
183
+ {"label": "adjectives", "pattern": [{"LOWER": "radiant"}], "id": "physics-bias"}
184
+ {"label": "adjectives", "pattern": [{"LOWER": "radioactive"}], "id": "physics-bias"}
185
+ {"label": "adjectives", "pattern": [{"LOWER": "rectilinear"}], "id": "physics-bias"}
186
+ {"label": "adjectives", "pattern": [{"LOWER": "relative"}], "id": "physics-bias"}
187
+ {"label": "adjectives", "pattern": [{"LOWER": "resolving"}], "id": "physics-bias"}
188
+ {"label": "adjectives", "pattern": [{"LOWER": "resonnt"}], "id": "physics-bias"}
189
+ {"label": "adjectives", "pattern": [{"LOWER": "resultant"}], "id": "physics-bias"}
190
+ {"label": "adjectives", "pattern": [{"LOWER": "rigid"}], "id": "physics-bias"}
191
+ {"label": "adjectives", "pattern": [{"LOWER": "volumetric"}], "id": "physics-bias"}
192
+ {"label": "adjectives", "pattern": [{"LOWER": ""}], "id": "temperature-bias"}
193
+ {"label": "adjectives", "pattern": [{"LOWER": "blistering"}], "id": "temperature-bias"}
194
+ {"label": "adjectives", "pattern": [{"LOWER": "burning"}], "id": "temperature-bias"}
195
+ {"label": "adjectives", "pattern": [{"LOWER": "chill"}], "id": "temperature-bias"}
196
+ {"label": "adjectives", "pattern": [{"LOWER": "cold"}], "id": "temperature-bias"}
197
+ {"label": "adjectives", "pattern": [{"LOWER": "cool"}], "id": "temperature-bias"}
198
+ {"label": "adjectives", "pattern": [{"LOWER": "freezing"}], "id": "temperature-bias"}
199
+ {"label": "adjectives", "pattern": [{"LOWER": "frigid"}], "id": "temperature-bias"}
200
+ {"label": "adjectives", "pattern": [{"LOWER": "frosty"}], "id": "temperature-bias"}
201
+ {"label": "adjectives", "pattern": [{"LOWER": "hot"}], "id": "temperature-bias"}
202
+ {"label": "adjectives", "pattern": [{"LOWER": "icy"}], "id": "temperature-bias"}
203
+ {"label": "adjectives", "pattern": [{"LOWER": "molten"}], "id": "temperature-bias"}
204
+ {"label": "adjectives", "pattern": [{"LOWER": "nippy"}], "id": "temperature-bias"}
205
+ {"label": "adjectives", "pattern": [{"LOWER": "scalding"}], "id": "temperature-bias"}
206
+ {"label": "adjectives", "pattern": [{"LOWER": "searing"}], "id": "temperature-bias"}
207
+ {"label": "adjectives", "pattern": [{"LOWER": "sizzling"}], "id": "temperature-bias"}
208
+ {"label": "adjectives", "pattern": [{"LOWER": "warm"}], "id": "temperature-bias"}
209
+ {"label": "adjectives", "pattern": [{"LOWER": "central"}], "id": "corporate_prefixes-bias"}
210
+ {"label": "adjectives", "pattern": [{"LOWER": "chief"}], "id": "corporate_prefixes-bias"}
211
+ {"label": "adjectives", "pattern": [{"LOWER": "corporate"}], "id": "corporate_prefixes-bias"}
212
+ {"label": "adjectives", "pattern": [{"LOWER": "customer"}], "id": "corporate_prefixes-bias"}
213
+ {"label": "adjectives", "pattern": [{"LOWER": "direct"}], "id": "corporate_prefixes-bias"}
214
+ {"label": "adjectives", "pattern": [{"LOWER": "district"}], "id": "corporate_prefixes-bias"}
215
+ {"label": "adjectives", "pattern": [{"LOWER": "dynamic"}], "id": "corporate_prefixes-bias"}
216
+ {"label": "adjectives", "pattern": [{"LOWER": "forward"}], "id": "corporate_prefixes-bias"}
217
+ {"label": "adjectives", "pattern": [{"LOWER": "future"}], "id": "corporate_prefixes-bias"}
218
+ {"label": "adjectives", "pattern": [{"LOWER": "global"}], "id": "corporate_prefixes-bias"}
219
+ {"label": "adjectives", "pattern": [{"LOWER": "human"}], "id": "corporate_prefixes-bias"}
220
+ {"label": "adjectives", "pattern": [{"LOWER": "internal"}], "id": "corporate_prefixes-bias"}
221
+ {"label": "adjectives", "pattern": [{"LOWER": "international"}], "id": "corporate_prefixes-bias"}
222
+ {"label": "adjectives", "pattern": [{"LOWER": "investor"}], "id": "corporate_prefixes-bias"}
223
+ {"label": "adjectives", "pattern": [{"LOWER": "lead"}], "id": "corporate_prefixes-bias"}
224
+ {"label": "adjectives", "pattern": [{"LOWER": "legacy"}], "id": "corporate_prefixes-bias"}
225
+ {"label": "adjectives", "pattern": [{"LOWER": "national"}], "id": "corporate_prefixes-bias"}
226
+ {"label": "adjectives", "pattern": [{"LOWER": "principal"}], "id": "corporate_prefixes-bias"}
227
+ {"label": "adjectives", "pattern": [{"LOWER": "product"}], "id": "corporate_prefixes-bias"}
228
+ {"label": "adjectives", "pattern": [{"LOWER": "regional"}], "id": "corporate_prefixes-bias"}
229
+ {"label": "adjectives", "pattern": [{"LOWER": "senior"}], "id": "corporate_prefixes-bias"}
230
+ {"label": "adjectives", "pattern": [{"LOWER": "staff"}], "id": "corporate_prefixes-bias"}
231
+ {"label": "adjectives", "pattern": [{"LOWER": "bare"}], "id": "complexity-bias"}
232
+ {"label": "adjectives", "pattern": [{"LOWER": "basic"}], "id": "complexity-bias"}
233
+ {"label": "adjectives", "pattern": [{"LOWER": "clear"}], "id": "complexity-bias"}
234
+ {"label": "adjectives", "pattern": [{"LOWER": "complex"}], "id": "complexity-bias"}
235
+ {"label": "adjectives", "pattern": [{"LOWER": "complicated"}], "id": "complexity-bias"}
236
+ {"label": "adjectives", "pattern": [{"LOWER": "convoluted"}], "id": "complexity-bias"}
237
+ {"label": "adjectives", "pattern": [{"LOWER": "direct"}], "id": "complexity-bias"}
238
+ {"label": "adjectives", "pattern": [{"LOWER": "easy"}], "id": "complexity-bias"}
239
+ {"label": "adjectives", "pattern": [{"LOWER": "elaborate"}], "id": "complexity-bias"}
240
+ {"label": "adjectives", "pattern": [{"LOWER": "fancy"}], "id": "complexity-bias"}
241
+ {"label": "adjectives", "pattern": [{"LOWER": "hard"}], "id": "complexity-bias"}
242
+ {"label": "adjectives", "pattern": [{"LOWER": "intricate"}], "id": "complexity-bias"}
243
+ {"label": "adjectives", "pattern": [{"LOWER": "obvious"}], "id": "complexity-bias"}
244
+ {"label": "adjectives", "pattern": [{"LOWER": "plain"}], "id": "complexity-bias"}
245
+ {"label": "adjectives", "pattern": [{"LOWER": "pure"}], "id": "complexity-bias"}
246
+ {"label": "adjectives", "pattern": [{"LOWER": "simple"}], "id": "complexity-bias"}
247
+ {"label": "adjectives", "pattern": [{"LOWER": "amber"}], "id": "colors-bias"}
248
+ {"label": "adjectives", "pattern": [{"LOWER": "ash"}], "id": "colors-bias"}
249
+ {"label": "adjectives", "pattern": [{"LOWER": "asphalt"}], "id": "colors-bias"}
250
+ {"label": "adjectives", "pattern": [{"LOWER": "auburn"}], "id": "colors-bias"}
251
+ {"label": "adjectives", "pattern": [{"LOWER": "avocado"}], "id": "colors-bias"}
252
+ {"label": "adjectives", "pattern": [{"LOWER": "aquamarine"}], "id": "colors-bias"}
253
+ {"label": "adjectives", "pattern": [{"LOWER": "azure"}], "id": "colors-bias"}
254
+ {"label": "adjectives", "pattern": [{"LOWER": "beige"}], "id": "colors-bias"}
255
+ {"label": "adjectives", "pattern": [{"LOWER": "bisque"}], "id": "colors-bias"}
256
+ {"label": "adjectives", "pattern": [{"LOWER": "black"}], "id": "colors-bias"}
257
+ {"label": "adjectives", "pattern": [{"LOWER": "blue"}], "id": "colors-bias"}
258
+ {"label": "adjectives", "pattern": [{"LOWER": "bone"}], "id": "colors-bias"}
259
+ {"label": "adjectives", "pattern": [{"LOWER": "bordeaux"}], "id": "colors-bias"}
260
+ {"label": "adjectives", "pattern": [{"LOWER": "brass"}], "id": "colors-bias"}
261
+ {"label": "adjectives", "pattern": [{"LOWER": "bronze"}], "id": "colors-bias"}
262
+ {"label": "adjectives", "pattern": [{"LOWER": "brown"}], "id": "colors-bias"}
263
+ {"label": "adjectives", "pattern": [{"LOWER": "burgundy"}], "id": "colors-bias"}
264
+ {"label": "adjectives", "pattern": [{"LOWER": "camel"}], "id": "colors-bias"}
265
+ {"label": "adjectives", "pattern": [{"LOWER": "caramel"}], "id": "colors-bias"}
266
+ {"label": "adjectives", "pattern": [{"LOWER": "canary"}], "id": "colors-bias"}
267
+ {"label": "adjectives", "pattern": [{"LOWER": "celeste"}], "id": "colors-bias"}
268
+ {"label": "adjectives", "pattern": [{"LOWER": "cerulean"}], "id": "colors-bias"}
269
+ {"label": "adjectives", "pattern": [{"LOWER": "champagne"}], "id": "colors-bias"}
270
+ {"label": "adjectives", "pattern": [{"LOWER": "charcoal"}], "id": "colors-bias"}
271
+ {"label": "adjectives", "pattern": [{"LOWER": "chartreuse"}], "id": "colors-bias"}
272
+ {"label": "adjectives", "pattern": [{"LOWER": "chestnut"}], "id": "colors-bias"}
273
+ {"label": "adjectives", "pattern": [{"LOWER": "chocolate"}], "id": "colors-bias"}
274
+ {"label": "adjectives", "pattern": [{"LOWER": "citron"}], "id": "colors-bias"}
275
+ {"label": "adjectives", "pattern": [{"LOWER": "claret"}], "id": "colors-bias"}
276
+ {"label": "adjectives", "pattern": [{"LOWER": "coal"}], "id": "colors-bias"}
277
+ {"label": "adjectives", "pattern": [{"LOWER": "cobalt"}], "id": "colors-bias"}
278
+ {"label": "adjectives", "pattern": [{"LOWER": "coffee"}], "id": "colors-bias"}
279
+ {"label": "adjectives", "pattern": [{"LOWER": "coral"}], "id": "colors-bias"}
280
+ {"label": "adjectives", "pattern": [{"LOWER": "corn"}], "id": "colors-bias"}
281
+ {"label": "adjectives", "pattern": [{"LOWER": "cream"}], "id": "colors-bias"}
282
+ {"label": "adjectives", "pattern": [{"LOWER": "crimson"}], "id": "colors-bias"}
283
+ {"label": "adjectives", "pattern": [{"LOWER": "cyan"}], "id": "colors-bias"}
284
+ {"label": "adjectives", "pattern": [{"LOWER": "denim"}], "id": "colors-bias"}
285
+ {"label": "adjectives", "pattern": [{"LOWER": "desert"}], "id": "colors-bias"}
286
+ {"label": "adjectives", "pattern": [{"LOWER": "ebony"}], "id": "colors-bias"}
287
+ {"label": "adjectives", "pattern": [{"LOWER": "ecru"}], "id": "colors-bias"}
288
+ {"label": "adjectives", "pattern": [{"LOWER": "emerald"}], "id": "colors-bias"}
289
+ {"label": "adjectives", "pattern": [{"LOWER": "feldspar"}], "id": "colors-bias"}
290
+ {"label": "adjectives", "pattern": [{"LOWER": "fuchsia"}], "id": "colors-bias"}
291
+ {"label": "adjectives", "pattern": [{"LOWER": "gold"}], "id": "colors-bias"}
292
+ {"label": "adjectives", "pattern": [{"LOWER": "gray"}], "id": "colors-bias"}
293
+ {"label": "adjectives", "pattern": [{"LOWER": "green"}], "id": "colors-bias"}
294
+ {"label": "adjectives", "pattern": [{"LOWER": "heather"}], "id": "colors-bias"}
295
+ {"label": "adjectives", "pattern": [{"LOWER": "indigo"}], "id": "colors-bias"}
296
+ {"label": "adjectives", "pattern": [{"LOWER": "ivory"}], "id": "colors-bias"}
297
+ {"label": "adjectives", "pattern": [{"LOWER": "jet"}], "id": "colors-bias"}
298
+ {"label": "adjectives", "pattern": [{"LOWER": "khaki"}], "id": "colors-bias"}
299
+ {"label": "adjectives", "pattern": [{"LOWER": "lime"}], "id": "colors-bias"}
300
+ {"label": "adjectives", "pattern": [{"LOWER": "magenta"}], "id": "colors-bias"}
301
+ {"label": "adjectives", "pattern": [{"LOWER": "maroon"}], "id": "colors-bias"}
302
+ {"label": "adjectives", "pattern": [{"LOWER": "mint"}], "id": "colors-bias"}
303
+ {"label": "adjectives", "pattern": [{"LOWER": "navy"}], "id": "colors-bias"}
304
+ {"label": "adjectives", "pattern": [{"LOWER": "olive"}], "id": "colors-bias"}
305
+ {"label": "adjectives", "pattern": [{"LOWER": "orange"}], "id": "colors-bias"}
306
+ {"label": "adjectives", "pattern": [{"LOWER": "pink"}], "id": "colors-bias"}
307
+ {"label": "adjectives", "pattern": [{"LOWER": "plum"}], "id": "colors-bias"}
308
+ {"label": "adjectives", "pattern": [{"LOWER": "purple"}], "id": "colors-bias"}
309
+ {"label": "adjectives", "pattern": [{"LOWER": "red"}], "id": "colors-bias"}
310
+ {"label": "adjectives", "pattern": [{"LOWER": "rust"}], "id": "colors-bias"}
311
+ {"label": "adjectives", "pattern": [{"LOWER": "salmon"}], "id": "colors-bias"}
312
+ {"label": "adjectives", "pattern": [{"LOWER": "sienna"}], "id": "colors-bias"}
313
+ {"label": "adjectives", "pattern": [{"LOWER": "silver"}], "id": "colors-bias"}
314
+ {"label": "adjectives", "pattern": [{"LOWER": "snow"}], "id": "colors-bias"}
315
+ {"label": "adjectives", "pattern": [{"LOWER": "steel"}], "id": "colors-bias"}
316
+ {"label": "adjectives", "pattern": [{"LOWER": "tan"}], "id": "colors-bias"}
317
+ {"label": "adjectives", "pattern": [{"LOWER": "teal"}], "id": "colors-bias"}
318
+ {"label": "adjectives", "pattern": [{"LOWER": "tomato"}], "id": "colors-bias"}
319
+ {"label": "adjectives", "pattern": [{"LOWER": "violet"}], "id": "colors-bias"}
320
+ {"label": "adjectives", "pattern": [{"LOWER": "white"}], "id": "colors-bias"}
321
+ {"label": "adjectives", "pattern": [{"LOWER": "yellow"}], "id": "colors-bias"}
322
+ {"label": "adjectives", "pattern": [{"LOWER": "bitter"}], "id": "taste-bias"}
323
+ {"label": "adjectives", "pattern": [{"LOWER": "chalky"}], "id": "taste-bias"}
324
+ {"label": "adjectives", "pattern": [{"LOWER": "chewy"}], "id": "taste-bias"}
325
+ {"label": "adjectives", "pattern": [{"LOWER": "creamy"}], "id": "taste-bias"}
326
+ {"label": "adjectives", "pattern": [{"LOWER": "crispy"}], "id": "taste-bias"}
327
+ {"label": "adjectives", "pattern": [{"LOWER": "crunchy"}], "id": "taste-bias"}
328
+ {"label": "adjectives", "pattern": [{"LOWER": "dry"}], "id": "taste-bias"}
329
+ {"label": "adjectives", "pattern": [{"LOWER": "greasy"}], "id": "taste-bias"}
330
+ {"label": "adjectives", "pattern": [{"LOWER": "gritty"}], "id": "taste-bias"}
331
+ {"label": "adjectives", "pattern": [{"LOWER": "mild"}], "id": "taste-bias"}
332
+ {"label": "adjectives", "pattern": [{"LOWER": "moist"}], "id": "taste-bias"}
333
+ {"label": "adjectives", "pattern": [{"LOWER": "oily"}], "id": "taste-bias"}
334
+ {"label": "adjectives", "pattern": [{"LOWER": "plain"}], "id": "taste-bias"}
335
+ {"label": "adjectives", "pattern": [{"LOWER": "salty"}], "id": "taste-bias"}
336
+ {"label": "adjectives", "pattern": [{"LOWER": "savory"}], "id": "taste-bias"}
337
+ {"label": "adjectives", "pattern": [{"LOWER": "sour"}], "id": "taste-bias"}
338
+ {"label": "adjectives", "pattern": [{"LOWER": "spicy"}], "id": "taste-bias"}
339
+ {"label": "adjectives", "pattern": [{"LOWER": "sweet"}], "id": "taste-bias"}
340
+ {"label": "adjectives", "pattern": [{"LOWER": "tangy"}], "id": "taste-bias"}
341
+ {"label": "adjectives", "pattern": [{"LOWER": "tart"}], "id": "taste-bias"}
342
+ {"label": "adjectives", "pattern": [{"LOWER": "zesty"}], "id": "taste-bias"}
343
+ {"label": "adjectives", "pattern": [{"LOWER": "all"}], "id": "quantity-bias"}
344
+ {"label": "adjectives", "pattern": [{"LOWER": "another"}], "id": "quantity-bias"}
345
+ {"label": "adjectives", "pattern": [{"LOWER": "each"}], "id": "quantity-bias"}
346
+ {"label": "adjectives", "pattern": [{"LOWER": "either"}], "id": "quantity-bias"}
347
+ {"label": "adjectives", "pattern": [{"LOWER": "every"}], "id": "quantity-bias"}
348
+ {"label": "adjectives", "pattern": [{"LOWER": "few"}], "id": "quantity-bias"}
349
+ {"label": "adjectives", "pattern": [{"LOWER": "many"}], "id": "quantity-bias"}
350
+ {"label": "adjectives", "pattern": [{"LOWER": "numerous"}], "id": "quantity-bias"}
351
+ {"label": "adjectives", "pattern": [{"LOWER": "one"}], "id": "quantity-bias"}
352
+ {"label": "adjectives", "pattern": [{"LOWER": "other"}], "id": "quantity-bias"}
353
+ {"label": "adjectives", "pattern": [{"LOWER": "several"}], "id": "quantity-bias"}
354
+ {"label": "adjectives", "pattern": [{"LOWER": "some"}], "id": "quantity-bias"}
355
+ {"label": "adjectives", "pattern": [{"LOWER": "average"}], "id": "size-bias"}
356
+ {"label": "adjectives", "pattern": [{"LOWER": "big"}], "id": "size-bias"}
357
+ {"label": "adjectives", "pattern": [{"LOWER": "broad"}], "id": "size-bias"}
358
+ {"label": "adjectives", "pattern": [{"LOWER": "flat"}], "id": "size-bias"}
359
+ {"label": "adjectives", "pattern": [{"LOWER": "giant"}], "id": "size-bias"}
360
+ {"label": "adjectives", "pattern": [{"LOWER": "huge"}], "id": "size-bias"}
361
+ {"label": "adjectives", "pattern": [{"LOWER": "humongous"}], "id": "size-bias"}
362
+ {"label": "adjectives", "pattern": [{"LOWER": "immense"}], "id": "size-bias"}
363
+ {"label": "adjectives", "pattern": [{"LOWER": "large"}], "id": "size-bias"}
364
+ {"label": "adjectives", "pattern": [{"LOWER": "little"}], "id": "size-bias"}
365
+ {"label": "adjectives", "pattern": [{"LOWER": "long"}], "id": "size-bias"}
366
+ {"label": "adjectives", "pattern": [{"LOWER": "massive"}], "id": "size-bias"}
367
+ {"label": "adjectives", "pattern": [{"LOWER": "medium"}], "id": "size-bias"}
368
+ {"label": "adjectives", "pattern": [{"LOWER": "miniature"}], "id": "size-bias"}
369
+ {"label": "adjectives", "pattern": [{"LOWER": "short"}], "id": "size-bias"}
370
+ {"label": "adjectives", "pattern": [{"LOWER": "small"}], "id": "size-bias"}
371
+ {"label": "adjectives", "pattern": [{"LOWER": "tall"}], "id": "size-bias"}
372
+ {"label": "adjectives", "pattern": [{"LOWER": "tiny"}], "id": "size-bias"}
373
+ {"label": "adjectives", "pattern": [{"LOWER": "wide"}], "id": "size-bias"}
374
+ {"label": "adjectives", "pattern": [{"LOWER": "absolute"}], "id": "algorithms-bias"}
375
+ {"label": "adjectives", "pattern": [{"LOWER": "abstract"}], "id": "algorithms-bias"}
376
+ {"label": "adjectives", "pattern": [{"LOWER": "active"}], "id": "algorithms-bias"}
377
+ {"label": "adjectives", "pattern": [{"LOWER": "acyclic"}], "id": "algorithms-bias"}
378
+ {"label": "adjectives", "pattern": [{"LOWER": "adaptive"}], "id": "algorithms-bias"}
379
+ {"label": "adjectives", "pattern": [{"LOWER": "amortized"}], "id": "algorithms-bias"}
380
+ {"label": "adjectives", "pattern": [{"LOWER": "approximate"}], "id": "algorithms-bias"}
381
+ {"label": "adjectives", "pattern": [{"LOWER": "ascent"}], "id": "algorithms-bias"}
382
+ {"label": "adjectives", "pattern": [{"LOWER": "associative"}], "id": "algorithms-bias"}
383
+ {"label": "adjectives", "pattern": [{"LOWER": "asymptotic"}], "id": "algorithms-bias"}
384
+ {"label": "adjectives", "pattern": [{"LOWER": "augmenting"}], "id": "algorithms-bias"}
385
+ {"label": "adjectives", "pattern": [{"LOWER": "average"}], "id": "algorithms-bias"}
386
+ {"label": "adjectives", "pattern": [{"LOWER": "balanced"}], "id": "algorithms-bias"}
387
+ {"label": "adjectives", "pattern": [{"LOWER": "best"}], "id": "algorithms-bias"}
388
+ {"label": "adjectives", "pattern": [{"LOWER": "binary"}], "id": "algorithms-bias"}
389
+ {"label": "adjectives", "pattern": [{"LOWER": "bipartite"}], "id": "algorithms-bias"}
390
+ {"label": "adjectives", "pattern": [{"LOWER": "blocking"}], "id": "algorithms-bias"}
391
+ {"label": "adjectives", "pattern": [{"LOWER": "boolean"}], "id": "algorithms-bias"}
392
+ {"label": "adjectives", "pattern": [{"LOWER": "bounded"}], "id": "algorithms-bias"}
393
+ {"label": "adjectives", "pattern": [{"LOWER": "brute force"}], "id": "algorithms-bias"}
394
+ {"label": "adjectives", "pattern": [{"LOWER": "commutative"}], "id": "algorithms-bias"}
395
+ {"label": "adjectives", "pattern": [{"LOWER": "complete"}], "id": "algorithms-bias"}
396
+ {"label": "adjectives", "pattern": [{"LOWER": "concave"}], "id": "algorithms-bias"}
397
+ {"label": "adjectives", "pattern": [{"LOWER": "concurrent"}], "id": "algorithms-bias"}
398
+ {"label": "adjectives", "pattern": [{"LOWER": "connected"}], "id": "algorithms-bias"}
399
+ {"label": "adjectives", "pattern": [{"LOWER": "constant"}], "id": "algorithms-bias"}
400
+ {"label": "adjectives", "pattern": [{"LOWER": "counting"}], "id": "algorithms-bias"}
401
+ {"label": "adjectives", "pattern": [{"LOWER": "covering"}], "id": "algorithms-bias"}
402
+ {"label": "adjectives", "pattern": [{"LOWER": "cyclic"}], "id": "algorithms-bias"}
403
+ {"label": "adjectives", "pattern": [{"LOWER": "decidable"}], "id": "algorithms-bias"}
404
+ {"label": "adjectives", "pattern": [{"LOWER": "descent"}], "id": "algorithms-bias"}
405
+ {"label": "adjectives", "pattern": [{"LOWER": "deterministic"}], "id": "algorithms-bias"}
406
+ {"label": "adjectives", "pattern": [{"LOWER": "dichotomic"}], "id": "algorithms-bias"}
407
+ {"label": "adjectives", "pattern": [{"LOWER": "dyadic"}], "id": "algorithms-bias"}
408
+ {"label": "adjectives", "pattern": [{"LOWER": "dynamic"}], "id": "algorithms-bias"}
409
+ {"label": "adjectives", "pattern": [{"LOWER": "exact"}], "id": "algorithms-bias"}
410
+ {"label": "adjectives", "pattern": [{"LOWER": "exhaustive"}], "id": "algorithms-bias"}
411
+ {"label": "adjectives", "pattern": [{"LOWER": "exponential"}], "id": "algorithms-bias"}
412
+ {"label": "adjectives", "pattern": [{"LOWER": "extended"}], "id": "algorithms-bias"}
413
+ {"label": "adjectives", "pattern": [{"LOWER": "external"}], "id": "algorithms-bias"}
414
+ {"label": "adjectives", "pattern": [{"LOWER": "extremal"}], "id": "algorithms-bias"}
415
+ {"label": "adjectives", "pattern": [{"LOWER": "factorial"}], "id": "algorithms-bias"}
416
+ {"label": "adjectives", "pattern": [{"LOWER": "feasible"}], "id": "algorithms-bias"}
417
+ {"label": "adjectives", "pattern": [{"LOWER": "finite"}], "id": "algorithms-bias"}
418
+ {"label": "adjectives", "pattern": [{"LOWER": "fixed"}], "id": "algorithms-bias"}
419
+ {"label": "adjectives", "pattern": [{"LOWER": "formal"}], "id": "algorithms-bias"}
420
+ {"label": "adjectives", "pattern": [{"LOWER": "forward"}], "id": "algorithms-bias"}
421
+ {"label": "adjectives", "pattern": [{"LOWER": "free"}], "id": "algorithms-bias"}
422
+ {"label": "adjectives", "pattern": [{"LOWER": "greedy"}], "id": "algorithms-bias"}
423
+ {"label": "adjectives", "pattern": [{"LOWER": "hidden"}], "id": "algorithms-bias"}
424
+ {"label": "adjectives", "pattern": [{"LOWER": "inclusive"}], "id": "algorithms-bias"}
425
+ {"label": "adjectives", "pattern": [{"LOWER": "internal"}], "id": "algorithms-bias"}
426
+ {"label": "adjectives", "pattern": [{"LOWER": "intractable"}], "id": "algorithms-bias"}
427
+ {"label": "adjectives", "pattern": [{"LOWER": "inverse"}], "id": "algorithms-bias"}
428
+ {"label": "adjectives", "pattern": [{"LOWER": "inverted"}], "id": "algorithms-bias"}
429
+ {"label": "adjectives", "pattern": [{"LOWER": "isomorphic"}], "id": "algorithms-bias"}
430
+ {"label": "adjectives", "pattern": [{"LOWER": "linear"}], "id": "algorithms-bias"}
431
+ {"label": "adjectives", "pattern": [{"LOWER": "local"}], "id": "algorithms-bias"}
432
+ {"label": "adjectives", "pattern": [{"LOWER": "lower"}], "id": "algorithms-bias"}
433
+ {"label": "adjectives", "pattern": [{"LOWER": "matching"}], "id": "algorithms-bias"}
434
+ {"label": "adjectives", "pattern": [{"LOWER": "maximum"}], "id": "algorithms-bias"}
435
+ {"label": "adjectives", "pattern": [{"LOWER": "mean"}], "id": "algorithms-bias"}
436
+ {"label": "adjectives", "pattern": [{"LOWER": "median"}], "id": "algorithms-bias"}
437
+ {"label": "adjectives", "pattern": [{"LOWER": "minimum"}], "id": "algorithms-bias"}
438
+ {"label": "adjectives", "pattern": [{"LOWER": "mode"}], "id": "algorithms-bias"}
439
+ {"label": "adjectives", "pattern": [{"LOWER": "naive"}], "id": "algorithms-bias"}
440
+ {"label": "adjectives", "pattern": [{"LOWER": "nearest"}], "id": "algorithms-bias"}
441
+ {"label": "adjectives", "pattern": [{"LOWER": "nondeterministic"}], "id": "algorithms-bias"}
442
+ {"label": "adjectives", "pattern": [{"LOWER": "null"}], "id": "algorithms-bias"}
443
+ {"label": "adjectives", "pattern": [{"LOWER": "nullary"}], "id": "algorithms-bias"}
444
+ {"label": "adjectives", "pattern": [{"LOWER": "objective"}], "id": "algorithms-bias"}
445
+ {"label": "adjectives", "pattern": [{"LOWER": "offline"}], "id": "algorithms-bias"}
446
+ {"label": "adjectives", "pattern": [{"LOWER": "online"}], "id": "algorithms-bias"}
447
+ {"label": "adjectives", "pattern": [{"LOWER": "optimal"}], "id": "algorithms-bias"}
448
+ {"label": "adjectives", "pattern": [{"LOWER": "ordered"}], "id": "algorithms-bias"}
449
+ {"label": "adjectives", "pattern": [{"LOWER": "oriented"}], "id": "algorithms-bias"}
450
+ {"label": "adjectives", "pattern": [{"LOWER": "orthogonal"}], "id": "algorithms-bias"}
451
+ {"label": "adjectives", "pattern": [{"LOWER": "oscillating"}], "id": "algorithms-bias"}
452
+ {"label": "adjectives", "pattern": [{"LOWER": "parallel"}], "id": "algorithms-bias"}
453
+ {"label": "adjectives", "pattern": [{"LOWER": "partial"}], "id": "algorithms-bias"}
454
+ {"label": "adjectives", "pattern": [{"LOWER": "perfect"}], "id": "algorithms-bias"}
455
+ {"label": "adjectives", "pattern": [{"LOWER": "persistent"}], "id": "algorithms-bias"}
456
+ {"label": "adjectives", "pattern": [{"LOWER": "planar"}], "id": "algorithms-bias"}
457
+ {"label": "adjectives", "pattern": [{"LOWER": "polynomial"}], "id": "algorithms-bias"}
458
+ {"label": "adjectives", "pattern": [{"LOWER": "proper"}], "id": "algorithms-bias"}
459
+ {"label": "adjectives", "pattern": [{"LOWER": "quadratic"}], "id": "algorithms-bias"}
460
+ {"label": "adjectives", "pattern": [{"LOWER": "ragged"}], "id": "algorithms-bias"}
461
+ {"label": "adjectives", "pattern": [{"LOWER": "random"}], "id": "algorithms-bias"}
462
+ {"label": "adjectives", "pattern": [{"LOWER": "randomized"}], "id": "algorithms-bias"}
463
+ {"label": "adjectives", "pattern": [{"LOWER": "rectilinear"}], "id": "algorithms-bias"}
464
+ {"label": "adjectives", "pattern": [{"LOWER": "recursive"}], "id": "algorithms-bias"}
465
+ {"label": "adjectives", "pattern": [{"LOWER": "reduced"}], "id": "algorithms-bias"}
466
+ {"label": "adjectives", "pattern": [{"LOWER": "relaxed"}], "id": "algorithms-bias"}
467
+ {"label": "adjectives", "pattern": [{"LOWER": "shortest"}], "id": "algorithms-bias"}
468
+ {"label": "adjectives", "pattern": [{"LOWER": "simple"}], "id": "algorithms-bias"}
469
+ {"label": "adjectives", "pattern": [{"LOWER": "sparse"}], "id": "algorithms-bias"}
470
+ {"label": "adjectives", "pattern": [{"LOWER": "spatial"}], "id": "algorithms-bias"}
471
+ {"label": "adjectives", "pattern": [{"LOWER": "square"}], "id": "algorithms-bias"}
472
+ {"label": "adjectives", "pattern": [{"LOWER": "stable"}], "id": "algorithms-bias"}
473
+ {"label": "adjectives", "pattern": [{"LOWER": "swarm"}], "id": "algorithms-bias"}
474
+ {"label": "adjectives", "pattern": [{"LOWER": "symmetric"}], "id": "algorithms-bias"}
475
+ {"label": "adjectives", "pattern": [{"LOWER": "terminal"}], "id": "algorithms-bias"}
476
+ {"label": "adjectives", "pattern": [{"LOWER": "ternary"}], "id": "algorithms-bias"}
477
+ {"label": "adjectives", "pattern": [{"LOWER": "threaded"}], "id": "algorithms-bias"}
478
+ {"label": "adjectives", "pattern": [{"LOWER": "tractable"}], "id": "algorithms-bias"}
479
+ {"label": "adjectives", "pattern": [{"LOWER": "unary"}], "id": "algorithms-bias"}
480
+ {"label": "adjectives", "pattern": [{"LOWER": "undecidable"}], "id": "algorithms-bias"}
481
+ {"label": "adjectives", "pattern": [{"LOWER": "undirected"}], "id": "algorithms-bias"}
482
+ {"label": "adjectives", "pattern": [{"LOWER": "uniform"}], "id": "algorithms-bias"}
483
+ {"label": "adjectives", "pattern": [{"LOWER": "universal"}], "id": "algorithms-bias"}
484
+ {"label": "adjectives", "pattern": [{"LOWER": "unsolvable"}], "id": "algorithms-bias"}
485
+ {"label": "adjectives", "pattern": [{"LOWER": "unsorted"}], "id": "algorithms-bias"}
486
+ {"label": "adjectives", "pattern": [{"LOWER": "visible"}], "id": "algorithms-bias"}
487
+ {"label": "adjectives", "pattern": [{"LOWER": "weighted"}], "id": "algorithms-bias"}
488
+ {"label": "adjectives", "pattern": [{"LOWER": "acute"}], "id": "geometry-bias"}
489
+ {"label": "adjectives", "pattern": [{"LOWER": "adjacent"}], "id": "geometry-bias"}
490
+ {"label": "adjectives", "pattern": [{"LOWER": "alternate"}], "id": "geometry-bias"}
491
+ {"label": "adjectives", "pattern": [{"LOWER": "central"}], "id": "geometry-bias"}
492
+ {"label": "adjectives", "pattern": [{"LOWER": "coincident"}], "id": "geometry-bias"}
493
+ {"label": "adjectives", "pattern": [{"LOWER": "collinear"}], "id": "geometry-bias"}
494
+ {"label": "adjectives", "pattern": [{"LOWER": "composite"}], "id": "geometry-bias"}
495
+ {"label": "adjectives", "pattern": [{"LOWER": "concave"}], "id": "geometry-bias"}
496
+ {"label": "adjectives", "pattern": [{"LOWER": "concentric"}], "id": "geometry-bias"}
497
+ {"label": "adjectives", "pattern": [{"LOWER": "congruent"}], "id": "geometry-bias"}
498
+ {"label": "adjectives", "pattern": [{"LOWER": "convex"}], "id": "geometry-bias"}
499
+ {"label": "adjectives", "pattern": [{"LOWER": "coplanar"}], "id": "geometry-bias"}
500
+ {"label": "adjectives", "pattern": [{"LOWER": "diagonal"}], "id": "geometry-bias"}
501
+ {"label": "adjectives", "pattern": [{"LOWER": "distinct"}], "id": "geometry-bias"}
502
+ {"label": "adjectives", "pattern": [{"LOWER": "equidistant"}], "id": "geometry-bias"}
503
+ {"label": "adjectives", "pattern": [{"LOWER": "equilateral"}], "id": "geometry-bias"}
504
+ {"label": "adjectives", "pattern": [{"LOWER": "fixed"}], "id": "geometry-bias"}
505
+ {"label": "adjectives", "pattern": [{"LOWER": "horizontal"}], "id": "geometry-bias"}
506
+ {"label": "adjectives", "pattern": [{"LOWER": "inscribed"}], "id": "geometry-bias"}
507
+ {"label": "adjectives", "pattern": [{"LOWER": "interior"}], "id": "geometry-bias"}
508
+ {"label": "adjectives", "pattern": [{"LOWER": "irregular"}], "id": "geometry-bias"}
509
+ {"label": "adjectives", "pattern": [{"LOWER": "linear"}], "id": "geometry-bias"}
510
+ {"label": "adjectives", "pattern": [{"LOWER": "oblique"}], "id": "geometry-bias"}
511
+ {"label": "adjectives", "pattern": [{"LOWER": "obtuse"}], "id": "geometry-bias"}
512
+ {"label": "adjectives", "pattern": [{"LOWER": "parallel"}], "id": "geometry-bias"}
513
+ {"label": "adjectives", "pattern": [{"LOWER": "perpendicular"}], "id": "geometry-bias"}
514
+ {"label": "adjectives", "pattern": [{"LOWER": "regular"}], "id": "geometry-bias"}
515
+ {"label": "adjectives", "pattern": [{"LOWER": "right"}], "id": "geometry-bias"}
516
+ {"label": "adjectives", "pattern": [{"LOWER": "similar"}], "id": "geometry-bias"}
517
+ {"label": "adjectives", "pattern": [{"LOWER": "vertical"}], "id": "geometry-bias"}
518
+ {"label": "adjectives", "pattern": [{"LOWER": "brass"}], "id": "materials-bias"}
519
+ {"label": "adjectives", "pattern": [{"LOWER": "chalky"}], "id": "materials-bias"}
520
+ {"label": "adjectives", "pattern": [{"LOWER": "concrete"}], "id": "materials-bias"}
521
+ {"label": "adjectives", "pattern": [{"LOWER": "felt"}], "id": "materials-bias"}
522
+ {"label": "adjectives", "pattern": [{"LOWER": "gilded"}], "id": "materials-bias"}
523
+ {"label": "adjectives", "pattern": [{"LOWER": "glass"}], "id": "materials-bias"}
524
+ {"label": "adjectives", "pattern": [{"LOWER": "golden"}], "id": "materials-bias"}
525
+ {"label": "adjectives", "pattern": [{"LOWER": "iron"}], "id": "materials-bias"}
526
+ {"label": "adjectives", "pattern": [{"LOWER": "leather"}], "id": "materials-bias"}
527
+ {"label": "adjectives", "pattern": [{"LOWER": "metal"}], "id": "materials-bias"}
528
+ {"label": "adjectives", "pattern": [{"LOWER": "metallic"}], "id": "materials-bias"}
529
+ {"label": "adjectives", "pattern": [{"LOWER": "oily"}], "id": "materials-bias"}
530
+ {"label": "adjectives", "pattern": [{"LOWER": "paper"}], "id": "materials-bias"}
531
+ {"label": "adjectives", "pattern": [{"LOWER": "plastic"}], "id": "materials-bias"}
532
+ {"label": "adjectives", "pattern": [{"LOWER": "silver"}], "id": "materials-bias"}
533
+ {"label": "adjectives", "pattern": [{"LOWER": "steel"}], "id": "materials-bias"}
534
+ {"label": "adjectives", "pattern": [{"LOWER": "stone"}], "id": "materials-bias"}
535
+ {"label": "adjectives", "pattern": [{"LOWER": "watery"}], "id": "materials-bias"}
536
+ {"label": "adjectives", "pattern": [{"LOWER": "wicker"}], "id": "materials-bias"}
537
+ {"label": "adjectives", "pattern": [{"LOWER": "wood"}], "id": "materials-bias"}
538
+ {"label": "adjectives", "pattern": [{"LOWER": "wooden"}], "id": "materials-bias"}
539
+ {"label": "adjectives", "pattern": [{"LOWER": "woolen"}], "id": "materials-bias"}
540
+ {"label": "adjectives", "pattern": [{"LOWER": "beveled"}], "id": "construction-bias"}
541
+ {"label": "adjectives", "pattern": [{"LOWER": "chamfered"}], "id": "construction-bias"}
542
+ {"label": "adjectives", "pattern": [{"LOWER": "coped"}], "id": "construction-bias"}
543
+ {"label": "adjectives", "pattern": [{"LOWER": "flashed"}], "id": "construction-bias"}
544
+ {"label": "adjectives", "pattern": [{"LOWER": "flush"}], "id": "construction-bias"}
545
+ {"label": "adjectives", "pattern": [{"LOWER": "inflammable"}], "id": "construction-bias"}
546
+ {"label": "adjectives", "pattern": [{"LOWER": "insulated"}], "id": "construction-bias"}
547
+ {"label": "adjectives", "pattern": [{"LOWER": "isometric"}], "id": "construction-bias"}
548
+ {"label": "adjectives", "pattern": [{"LOWER": "joint"}], "id": "construction-bias"}
549
+ {"label": "adjectives", "pattern": [{"LOWER": "knurled"}], "id": "construction-bias"}
550
+ {"label": "adjectives", "pattern": [{"LOWER": "laminated"}], "id": "construction-bias"}
551
+ {"label": "adjectives", "pattern": [{"LOWER": "level"}], "id": "construction-bias"}
552
+ {"label": "adjectives", "pattern": [{"LOWER": "plumb"}], "id": "construction-bias"}
553
+ {"label": "adjectives", "pattern": [{"LOWER": "radial"}], "id": "construction-bias"}
554
+ {"label": "adjectives", "pattern": [{"LOWER": "rigid"}], "id": "construction-bias"}
555
+ {"label": "adjectives", "pattern": [{"LOWER": "soluble"}], "id": "construction-bias"}
556
+ {"label": "adjectives", "pattern": [{"LOWER": "tempered"}], "id": "construction-bias"}
557
+ {"label": "adjectives", "pattern": [{"LOWER": "warped"}], "id": "construction-bias"}
558
+ {"label": "adjectives", "pattern": [{"LOWER": "adagio"}], "id": "music_theory-bias"}
559
+ {"label": "adjectives", "pattern": [{"LOWER": "allegro"}], "id": "music_theory-bias"}
560
+ {"label": "adjectives", "pattern": [{"LOWER": "andante"}], "id": "music_theory-bias"}
561
+ {"label": "adjectives", "pattern": [{"LOWER": "animato"}], "id": "music_theory-bias"}
562
+ {"label": "adjectives", "pattern": [{"LOWER": "espressivo"}], "id": "music_theory-bias"}
563
+ {"label": "adjectives", "pattern": [{"LOWER": "grandioso"}], "id": "music_theory-bias"}
564
+ {"label": "adjectives", "pattern": [{"LOWER": "grave"}], "id": "music_theory-bias"}
565
+ {"label": "adjectives", "pattern": [{"LOWER": "largo"}], "id": "music_theory-bias"}
566
+ {"label": "adjectives", "pattern": [{"LOWER": "legato"}], "id": "music_theory-bias"}
567
+ {"label": "adjectives", "pattern": [{"LOWER": "libretto"}], "id": "music_theory-bias"}
568
+ {"label": "adjectives", "pattern": [{"LOWER": "moderato"}], "id": "music_theory-bias"}
569
+ {"label": "adjectives", "pattern": [{"LOWER": "molto"}], "id": "music_theory-bias"}
570
+ {"label": "adjectives", "pattern": [{"LOWER": "pizzicato"}], "id": "music_theory-bias"}
571
+ {"label": "adjectives", "pattern": [{"LOWER": "presto"}], "id": "music_theory-bias"}
572
+ {"label": "adjectives", "pattern": [{"LOWER": "staccato"}], "id": "music_theory-bias"}
573
+ {"label": "adjectives", "pattern": [{"LOWER": "vibrato"}], "id": "music_theory-bias"}
574
+ {"label": "adjectives", "pattern": [{"LOWER": "blazing"}], "id": "appearance-bias"}
575
+ {"label": "adjectives", "pattern": [{"LOWER": "bright"}], "id": "appearance-bias"}
576
+ {"label": "adjectives", "pattern": [{"LOWER": "brilliant"}], "id": "appearance-bias"}
577
+ {"label": "adjectives", "pattern": [{"LOWER": "burning"}], "id": "appearance-bias"}
578
+ {"label": "adjectives", "pattern": [{"LOWER": "clean"}], "id": "appearance-bias"}
579
+ {"label": "adjectives", "pattern": [{"LOWER": "colorful"}], "id": "appearance-bias"}
580
+ {"label": "adjectives", "pattern": [{"LOWER": "dark"}], "id": "appearance-bias"}
581
+ {"label": "adjectives", "pattern": [{"LOWER": "drab"}], "id": "appearance-bias"}
582
+ {"label": "adjectives", "pattern": [{"LOWER": "dull"}], "id": "appearance-bias"}
583
+ {"label": "adjectives", "pattern": [{"LOWER": "faded"}], "id": "appearance-bias"}
584
+ {"label": "adjectives", "pattern": [{"LOWER": "flat"}], "id": "appearance-bias"}
585
+ {"label": "adjectives", "pattern": [{"LOWER": "glossy"}], "id": "appearance-bias"}
586
+ {"label": "adjectives", "pattern": [{"LOWER": "glowing"}], "id": "appearance-bias"}
587
+ {"label": "adjectives", "pattern": [{"LOWER": "light"}], "id": "appearance-bias"}
588
+ {"label": "adjectives", "pattern": [{"LOWER": "matte"}], "id": "appearance-bias"}
589
+ {"label": "adjectives", "pattern": [{"LOWER": "muted"}], "id": "appearance-bias"}
590
+ {"label": "adjectives", "pattern": [{"LOWER": "pale"}], "id": "appearance-bias"}
591
+ {"label": "adjectives", "pattern": [{"LOWER": "pallid"}], "id": "appearance-bias"}
592
+ {"label": "adjectives", "pattern": [{"LOWER": "radiant"}], "id": "appearance-bias"}
593
+ {"label": "adjectives", "pattern": [{"LOWER": "shiny"}], "id": "appearance-bias"}
594
+ {"label": "adjectives", "pattern": [{"LOWER": "sleek"}], "id": "appearance-bias"}
595
+ {"label": "adjectives", "pattern": [{"LOWER": "sunny"}], "id": "appearance-bias"}
596
+ {"label": "adjectives", "pattern": [{"LOWER": "vibrant"}], "id": "appearance-bias"}
597
+ {"label": "adjectives", "pattern": [{"LOWER": "vivid"}], "id": "appearance-bias"}
598
+ {"label": "adjectives", "pattern": [{"LOWER": "wan"}], "id": "appearance-bias"}
599
+ {"label": "adjectives", "pattern": [{"LOWER": "weathered"}], "id": "appearance-bias"}
600
+ {"label": "adjectives", "pattern": [{"LOWER": "worn"}], "id": "appearance-bias"}
601
+ {"label": "adjectives", "pattern": [{"LOWER": "descriptive"}], "id": "linguistics-bias"}
602
+ {"label": "adjectives", "pattern": [{"LOWER": "diachronic"}], "id": "linguistics-bias"}
603
+ {"label": "adjectives", "pattern": [{"LOWER": "figurative"}], "id": "linguistics-bias"}
604
+ {"label": "adjectives", "pattern": [{"LOWER": "generative"}], "id": "linguistics-bias"}
605
+ {"label": "adjectives", "pattern": [{"LOWER": "marked"}], "id": "linguistics-bias"}
606
+ {"label": "adjectives", "pattern": [{"LOWER": "regular"}], "id": "linguistics-bias"}
607
+ {"label": "adjectives", "pattern": [{"LOWER": "synchronic"}], "id": "linguistics-bias"}
608
+ {"label": "adjectives", "pattern": [{"LOWER": "taxonomic"}], "id": "linguistics-bias"}
609
+ {"label": "adjectives", "pattern": [{"LOWER": "unproductive"}], "id": "linguistics-bias"}
610
+ {"label": "adjectives", "pattern": [{"LOWER": "afraid"}], "id": "emotions-bias"}
611
+ {"label": "adjectives", "pattern": [{"LOWER": "angry"}], "id": "emotions-bias"}
612
+ {"label": "adjectives", "pattern": [{"LOWER": "calm"}], "id": "emotions-bias"}
613
+ {"label": "adjectives", "pattern": [{"LOWER": "cheerful"}], "id": "emotions-bias"}
614
+ {"label": "adjectives", "pattern": [{"LOWER": "cold"}], "id": "emotions-bias"}
615
+ {"label": "adjectives", "pattern": [{"LOWER": "crabby"}], "id": "emotions-bias"}
616
+ {"label": "adjectives", "pattern": [{"LOWER": "crazy"}], "id": "emotions-bias"}
617
+ {"label": "adjectives", "pattern": [{"LOWER": "cross"}], "id": "emotions-bias"}
618
+ {"label": "adjectives", "pattern": [{"LOWER": "excited"}], "id": "emotions-bias"}
619
+ {"label": "adjectives", "pattern": [{"LOWER": "frigid"}], "id": "emotions-bias"}
620
+ {"label": "adjectives", "pattern": [{"LOWER": "furious"}], "id": "emotions-bias"}
621
+ {"label": "adjectives", "pattern": [{"LOWER": "glad"}], "id": "emotions-bias"}
622
+ {"label": "adjectives", "pattern": [{"LOWER": "glum"}], "id": "emotions-bias"}
623
+ {"label": "adjectives", "pattern": [{"LOWER": "happy"}], "id": "emotions-bias"}
624
+ {"label": "adjectives", "pattern": [{"LOWER": "icy"}], "id": "emotions-bias"}
625
+ {"label": "adjectives", "pattern": [{"LOWER": "jolly"}], "id": "emotions-bias"}
626
+ {"label": "adjectives", "pattern": [{"LOWER": "jovial"}], "id": "emotions-bias"}
627
+ {"label": "adjectives", "pattern": [{"LOWER": "kind"}], "id": "emotions-bias"}
628
+ {"label": "adjectives", "pattern": [{"LOWER": "lively"}], "id": "emotions-bias"}
629
+ {"label": "adjectives", "pattern": [{"LOWER": "livid"}], "id": "emotions-bias"}
630
+ {"label": "adjectives", "pattern": [{"LOWER": "mad"}], "id": "emotions-bias"}
631
+ {"label": "adjectives", "pattern": [{"LOWER": "ornery"}], "id": "emotions-bias"}
632
+ {"label": "adjectives", "pattern": [{"LOWER": "rosy"}], "id": "emotions-bias"}
633
+ {"label": "adjectives", "pattern": [{"LOWER": "sad"}], "id": "emotions-bias"}
634
+ {"label": "adjectives", "pattern": [{"LOWER": "scared"}], "id": "emotions-bias"}
635
+ {"label": "adjectives", "pattern": [{"LOWER": "seething"}], "id": "emotions-bias"}
636
+ {"label": "adjectives", "pattern": [{"LOWER": "shy"}], "id": "emotions-bias"}
637
+ {"label": "adjectives", "pattern": [{"LOWER": "sunny"}], "id": "emotions-bias"}
638
+ {"label": "adjectives", "pattern": [{"LOWER": "tense"}], "id": "emotions-bias"}
639
+ {"label": "adjectives", "pattern": [{"LOWER": "tranquil"}], "id": "emotions-bias"}
640
+ {"label": "adjectives", "pattern": [{"LOWER": "upbeat"}], "id": "emotions-bias"}
641
+ {"label": "adjectives", "pattern": [{"LOWER": "wary"}], "id": "emotions-bias"}
642
+ {"label": "adjectives", "pattern": [{"LOWER": "weary"}], "id": "emotions-bias"}
643
+ {"label": "adjectives", "pattern": [{"LOWER": "worried"}], "id": "emotions-bias"}
644
+ {"label": "adjectives", "pattern": [{"LOWER": "advanced"}], "id": "age-bias"}
645
+ {"label": "adjectives", "pattern": [{"LOWER": "aged"}], "id": "age-bias"}
646
+ {"label": "adjectives", "pattern": [{"LOWER": "ancient"}], "id": "age-bias"}
647
+ {"label": "adjectives", "pattern": [{"LOWER": "antique"}], "id": "age-bias"}
648
+ {"label": "adjectives", "pattern": [{"LOWER": "archaic"}], "id": "age-bias"}
649
+ {"label": "adjectives", "pattern": [{"LOWER": "contemporary"}], "id": "age-bias"}
650
+ {"label": "adjectives", "pattern": [{"LOWER": "current"}], "id": "age-bias"}
651
+ {"label": "adjectives", "pattern": [{"LOWER": "frayed"}], "id": "age-bias"}
652
+ {"label": "adjectives", "pattern": [{"LOWER": "fresh"}], "id": "age-bias"}
653
+ {"label": "adjectives", "pattern": [{"LOWER": "grizzled"}], "id": "age-bias"}
654
+ {"label": "adjectives", "pattern": [{"LOWER": "hoary"}], "id": "age-bias"}
655
+ {"label": "adjectives", "pattern": [{"LOWER": "immature"}], "id": "age-bias"}
656
+ {"label": "adjectives", "pattern": [{"LOWER": "juvenile"}], "id": "age-bias"}
657
+ {"label": "adjectives", "pattern": [{"LOWER": "mature"}], "id": "age-bias"}
658
+ {"label": "adjectives", "pattern": [{"LOWER": "modern"}], "id": "age-bias"}
659
+ {"label": "adjectives", "pattern": [{"LOWER": "new"}], "id": "age-bias"}
660
+ {"label": "adjectives", "pattern": [{"LOWER": "novel"}], "id": "age-bias"}
661
+ {"label": "adjectives", "pattern": [{"LOWER": "obsolete"}], "id": "age-bias"}
662
+ {"label": "adjectives", "pattern": [{"LOWER": "old"}], "id": "age-bias"}
663
+ {"label": "adjectives", "pattern": [{"LOWER": "primordial"}], "id": "age-bias"}
664
+ {"label": "adjectives", "pattern": [{"LOWER": "ragged"}], "id": "age-bias"}
665
+ {"label": "adjectives", "pattern": [{"LOWER": "raw"}], "id": "age-bias"}
666
+ {"label": "adjectives", "pattern": [{"LOWER": "recent"}], "id": "age-bias"}
667
+ {"label": "adjectives", "pattern": [{"LOWER": "senile"}], "id": "age-bias"}
668
+ {"label": "adjectives", "pattern": [{"LOWER": "shabby"}], "id": "age-bias"}
669
+ {"label": "adjectives", "pattern": [{"LOWER": "stale"}], "id": "age-bias"}
670
+ {"label": "adjectives", "pattern": [{"LOWER": "tattered"}], "id": "age-bias"}
671
+ {"label": "adjectives", "pattern": [{"LOWER": "threadbare"}], "id": "age-bias"}
672
+ {"label": "adjectives", "pattern": [{"LOWER": "trite"}], "id": "age-bias"}
673
+ {"label": "adjectives", "pattern": [{"LOWER": "vintage"}], "id": "age-bias"}
674
+ {"label": "adjectives", "pattern": [{"LOWER": "worn"}], "id": "age-bias"}
675
+ {"label": "adjectives", "pattern": [{"LOWER": "young"}], "id": "age-bias"}
676
+ {"label": "adjectives", "pattern": [{"LOWER": "accepting"}], "id": "character-bias"}
677
+ {"label": "adjectives", "pattern": [{"LOWER": "adventurous"}], "id": "character-bias"}
678
+ {"label": "adjectives", "pattern": [{"LOWER": "affable"}], "id": "character-bias"}
679
+ {"label": "adjectives", "pattern": [{"LOWER": "ambitious"}], "id": "character-bias"}
680
+ {"label": "adjectives", "pattern": [{"LOWER": "amiable"}], "id": "character-bias"}
681
+ {"label": "adjectives", "pattern": [{"LOWER": "amicable"}], "id": "character-bias"}
682
+ {"label": "adjectives", "pattern": [{"LOWER": "annoying"}], "id": "character-bias"}
683
+ {"label": "adjectives", "pattern": [{"LOWER": "bold"}], "id": "character-bias"}
684
+ {"label": "adjectives", "pattern": [{"LOWER": "brave"}], "id": "character-bias"}
685
+ {"label": "adjectives", "pattern": [{"LOWER": "bright"}], "id": "character-bias"}
686
+ {"label": "adjectives", "pattern": [{"LOWER": "brutal"}], "id": "character-bias"}
687
+ {"label": "adjectives", "pattern": [{"LOWER": "brute"}], "id": "character-bias"}
688
+ {"label": "adjectives", "pattern": [{"LOWER": "callous"}], "id": "character-bias"}
689
+ {"label": "adjectives", "pattern": [{"LOWER": "calm"}], "id": "character-bias"}
690
+ {"label": "adjectives", "pattern": [{"LOWER": "careful"}], "id": "character-bias"}
691
+ {"label": "adjectives", "pattern": [{"LOWER": "cautious"}], "id": "character-bias"}
692
+ {"label": "adjectives", "pattern": [{"LOWER": "charitable"}], "id": "character-bias"}
693
+ {"label": "adjectives", "pattern": [{"LOWER": "cheerful"}], "id": "character-bias"}
694
+ {"label": "adjectives", "pattern": [{"LOWER": "clever"}], "id": "character-bias"}
695
+ {"label": "adjectives", "pattern": [{"LOWER": "courtly"}], "id": "character-bias"}
696
+ {"label": "adjectives", "pattern": [{"LOWER": "creative"}], "id": "character-bias"}
697
+ {"label": "adjectives", "pattern": [{"LOWER": "cruel"}], "id": "character-bias"}
698
+ {"label": "adjectives", "pattern": [{"LOWER": "curious"}], "id": "character-bias"}
699
+ {"label": "adjectives", "pattern": [{"LOWER": "daring"}], "id": "character-bias"}
700
+ {"label": "adjectives", "pattern": [{"LOWER": "devout"}], "id": "character-bias"}
701
+ {"label": "adjectives", "pattern": [{"LOWER": "eager"}], "id": "character-bias"}
702
+ {"label": "adjectives", "pattern": [{"LOWER": "elegant"}], "id": "character-bias"}
703
+ {"label": "adjectives", "pattern": [{"LOWER": "energetic"}], "id": "character-bias"}
704
+ {"label": "adjectives", "pattern": [{"LOWER": "excited"}], "id": "character-bias"}
705
+ {"label": "adjectives", "pattern": [{"LOWER": "ferocious"}], "id": "character-bias"}
706
+ {"label": "adjectives", "pattern": [{"LOWER": "forgiving"}], "id": "character-bias"}
707
+ {"label": "adjectives", "pattern": [{"LOWER": "free"}], "id": "character-bias"}
708
+ {"label": "adjectives", "pattern": [{"LOWER": "friendly"}], "id": "character-bias"}
709
+ {"label": "adjectives", "pattern": [{"LOWER": "funny"}], "id": "character-bias"}
710
+ {"label": "adjectives", "pattern": [{"LOWER": "generous"}], "id": "character-bias"}
711
+ {"label": "adjectives", "pattern": [{"LOWER": "genteel"}], "id": "character-bias"}
712
+ {"label": "adjectives", "pattern": [{"LOWER": "gentle"}], "id": "character-bias"}
713
+ {"label": "adjectives", "pattern": [{"LOWER": "graceful"}], "id": "character-bias"}
714
+ {"label": "adjectives", "pattern": [{"LOWER": "grim"}], "id": "character-bias"}
715
+ {"label": "adjectives", "pattern": [{"LOWER": "grouchy"}], "id": "character-bias"}
716
+ {"label": "adjectives", "pattern": [{"LOWER": "happy"}], "id": "character-bias"}
717
+ {"label": "adjectives", "pattern": [{"LOWER": "heartless"}], "id": "character-bias"}
718
+ {"label": "adjectives", "pattern": [{"LOWER": "helpful"}], "id": "character-bias"}
719
+ {"label": "adjectives", "pattern": [{"LOWER": "honest"}], "id": "character-bias"}
720
+ {"label": "adjectives", "pattern": [{"LOWER": "humane"}], "id": "character-bias"}
721
+ {"label": "adjectives", "pattern": [{"LOWER": "humble"}], "id": "character-bias"}
722
+ {"label": "adjectives", "pattern": [{"LOWER": "impulsive"}], "id": "character-bias"}
723
+ {"label": "adjectives", "pattern": [{"LOWER": "independent"}], "id": "character-bias"}
724
+ {"label": "adjectives", "pattern": [{"LOWER": "indulgent"}], "id": "character-bias"}
725
+ {"label": "adjectives", "pattern": [{"LOWER": "intense"}], "id": "character-bias"}
726
+ {"label": "adjectives", "pattern": [{"LOWER": "inventive"}], "id": "character-bias"}
727
+ {"label": "adjectives", "pattern": [{"LOWER": "kind"}], "id": "character-bias"}
728
+ {"label": "adjectives", "pattern": [{"LOWER": "lazy"}], "id": "character-bias"}
729
+ {"label": "adjectives", "pattern": [{"LOWER": "lenient"}], "id": "character-bias"}
730
+ {"label": "adjectives", "pattern": [{"LOWER": "loyal"}], "id": "character-bias"}
731
+ {"label": "adjectives", "pattern": [{"LOWER": "meek"}], "id": "character-bias"}
732
+ {"label": "adjectives", "pattern": [{"LOWER": "merciless"}], "id": "character-bias"}
733
+ {"label": "adjectives", "pattern": [{"LOWER": "merry"}], "id": "character-bias"}
734
+ {"label": "adjectives", "pattern": [{"LOWER": "messy"}], "id": "character-bias"}
735
+ {"label": "adjectives", "pattern": [{"LOWER": "mild"}], "id": "character-bias"}
736
+ {"label": "adjectives", "pattern": [{"LOWER": "neat"}], "id": "character-bias"}
737
+ {"label": "adjectives", "pattern": [{"LOWER": "nervous"}], "id": "character-bias"}
738
+ {"label": "adjectives", "pattern": [{"LOWER": "obliging"}], "id": "character-bias"}
739
+ {"label": "adjectives", "pattern": [{"LOWER": "obnoxious"}], "id": "character-bias"}
740
+ {"label": "adjectives", "pattern": [{"LOWER": "odious"}], "id": "character-bias"}
741
+ {"label": "adjectives", "pattern": [{"LOWER": "patient"}], "id": "character-bias"}
742
+ {"label": "adjectives", "pattern": [{"LOWER": "plain"}], "id": "character-bias"}
743
+ {"label": "adjectives", "pattern": [{"LOWER": "pleasant"}], "id": "character-bias"}
744
+ {"label": "adjectives", "pattern": [{"LOWER": "polite"}], "id": "character-bias"}
745
+ {"label": "adjectives", "pattern": [{"LOWER": "proper"}], "id": "character-bias"}
746
+ {"label": "adjectives", "pattern": [{"LOWER": "proud"}], "id": "character-bias"}
747
+ {"label": "adjectives", "pattern": [{"LOWER": "quick"}], "id": "character-bias"}
748
+ {"label": "adjectives", "pattern": [{"LOWER": "quiet"}], "id": "character-bias"}
749
+ {"label": "adjectives", "pattern": [{"LOWER": "refined"}], "id": "character-bias"}
750
+ {"label": "adjectives", "pattern": [{"LOWER": "relaxed"}], "id": "character-bias"}
751
+ {"label": "adjectives", "pattern": [{"LOWER": "religious"}], "id": "character-bias"}
752
+ {"label": "adjectives", "pattern": [{"LOWER": "respectful"}], "id": "character-bias"}
753
+ {"label": "adjectives", "pattern": [{"LOWER": "rude"}], "id": "character-bias"}
754
+ {"label": "adjectives", "pattern": [{"LOWER": "savage"}], "id": "character-bias"}
755
+ {"label": "adjectives", "pattern": [{"LOWER": "selfish"}], "id": "character-bias"}
756
+ {"label": "adjectives", "pattern": [{"LOWER": "sensitive"}], "id": "character-bias"}
757
+ {"label": "adjectives", "pattern": [{"LOWER": "serious"}], "id": "character-bias"}
758
+ {"label": "adjectives", "pattern": [{"LOWER": "shrewd"}], "id": "character-bias"}
759
+ {"label": "adjectives", "pattern": [{"LOWER": "silly"}], "id": "character-bias"}
760
+ {"label": "adjectives", "pattern": [{"LOWER": "simple"}], "id": "character-bias"}
761
+ {"label": "adjectives", "pattern": [{"LOWER": "smart"}], "id": "character-bias"}
762
+ {"label": "adjectives", "pattern": [{"LOWER": "soft"}], "id": "character-bias"}
763
+ {"label": "adjectives", "pattern": [{"LOWER": "sophisticated"}], "id": "character-bias"}
764
+ {"label": "adjectives", "pattern": [{"LOWER": "stern"}], "id": "character-bias"}
765
+ {"label": "adjectives", "pattern": [{"LOWER": "strong"}], "id": "character-bias"}
766
+ {"label": "adjectives", "pattern": [{"LOWER": "stubborn"}], "id": "character-bias"}
767
+ {"label": "adjectives", "pattern": [{"LOWER": "tender"}], "id": "character-bias"}
768
+ {"label": "adjectives", "pattern": [{"LOWER": "tense"}], "id": "character-bias"}
769
+ {"label": "adjectives", "pattern": [{"LOWER": "timid"}], "id": "character-bias"}
770
+ {"label": "adjectives", "pattern": [{"LOWER": "tough"}], "id": "character-bias"}
771
+ {"label": "adjectives", "pattern": [{"LOWER": "trusting"}], "id": "character-bias"}
772
+ {"label": "adjectives", "pattern": [{"LOWER": "urbane"}], "id": "character-bias"}
773
+ {"label": "adjectives", "pattern": [{"LOWER": "vain"}], "id": "character-bias"}
774
+ {"label": "adjectives", "pattern": [{"LOWER": "vicious"}], "id": "character-bias"}
775
+ {"label": "adjectives", "pattern": [{"LOWER": "violent"}], "id": "character-bias"}
776
+ {"label": "adjectives", "pattern": [{"LOWER": "warm"}], "id": "character-bias"}
777
+ {"label": "adjectives", "pattern": [{"LOWER": "wise"}], "id": "character-bias"}
778
+ {"label": "adjectives", "pattern": [{"LOWER": "witty"}], "id": "character-bias"}
779
+ {"label": "adjectives", "pattern": [{"LOWER": "acidic"}], "id": "food-bias"}
780
+ {"label": "adjectives", "pattern": [{"LOWER": "baked"}], "id": "food-bias"}
781
+ {"label": "adjectives", "pattern": [{"LOWER": "bitter"}], "id": "food-bias"}
782
+ {"label": "adjectives", "pattern": [{"LOWER": "bland"}], "id": "food-bias"}
783
+ {"label": "adjectives", "pattern": [{"LOWER": "blended"}], "id": "food-bias"}
784
+ {"label": "adjectives", "pattern": [{"LOWER": "briny"}], "id": "food-bias"}
785
+ {"label": "adjectives", "pattern": [{"LOWER": "buttery"}], "id": "food-bias"}
786
+ {"label": "adjectives", "pattern": [{"LOWER": "candied"}], "id": "food-bias"}
787
+ {"label": "adjectives", "pattern": [{"LOWER": "cheesy"}], "id": "food-bias"}
788
+ {"label": "adjectives", "pattern": [{"LOWER": "chewy"}], "id": "food-bias"}
789
+ {"label": "adjectives", "pattern": [{"LOWER": "chocolaty"}], "id": "food-bias"}
790
+ {"label": "adjectives", "pattern": [{"LOWER": "cold"}], "id": "food-bias"}
791
+ {"label": "adjectives", "pattern": [{"LOWER": "creamy"}], "id": "food-bias"}
792
+ {"label": "adjectives", "pattern": [{"LOWER": "crispy"}], "id": "food-bias"}
793
+ {"label": "adjectives", "pattern": [{"LOWER": "crunchy"}], "id": "food-bias"}
794
+ {"label": "adjectives", "pattern": [{"LOWER": "delicious"}], "id": "food-bias"}
795
+ {"label": "adjectives", "pattern": [{"LOWER": "doughy"}], "id": "food-bias"}
796
+ {"label": "adjectives", "pattern": [{"LOWER": "dry"}], "id": "food-bias"}
797
+ {"label": "adjectives", "pattern": [{"LOWER": "flavorful"}], "id": "food-bias"}
798
+ {"label": "adjectives", "pattern": [{"LOWER": "frozen"}], "id": "food-bias"}
799
+ {"label": "adjectives", "pattern": [{"LOWER": "golden"}], "id": "food-bias"}
800
+ {"label": "adjectives", "pattern": [{"LOWER": "gourmet"}], "id": "food-bias"}
801
+ {"label": "adjectives", "pattern": [{"LOWER": "greasy"}], "id": "food-bias"}
802
+ {"label": "adjectives", "pattern": [{"LOWER": "grilled"}], "id": "food-bias"}
803
+ {"label": "adjectives", "pattern": [{"LOWER": "icy"}], "id": "food-bias"}
804
+ {"label": "adjectives", "pattern": [{"LOWER": "intense"}], "id": "food-bias"}
805
+ {"label": "adjectives", "pattern": [{"LOWER": "jellied"}], "id": "food-bias"}
806
+ {"label": "adjectives", "pattern": [{"LOWER": "juicy"}], "id": "food-bias"}
807
+ {"label": "adjectives", "pattern": [{"LOWER": "jumbo"}], "id": "food-bias"}
808
+ {"label": "adjectives", "pattern": [{"LOWER": "lean"}], "id": "food-bias"}
809
+ {"label": "adjectives", "pattern": [{"LOWER": "marinated"}], "id": "food-bias"}
810
+ {"label": "adjectives", "pattern": [{"LOWER": "mashed"}], "id": "food-bias"}
811
+ {"label": "adjectives", "pattern": [{"LOWER": "mild"}], "id": "food-bias"}
812
+ {"label": "adjectives", "pattern": [{"LOWER": "minty"}], "id": "food-bias"}
813
+ {"label": "adjectives", "pattern": [{"LOWER": "nutty"}], "id": "food-bias"}
814
+ {"label": "adjectives", "pattern": [{"LOWER": "organic"}], "id": "food-bias"}
815
+ {"label": "adjectives", "pattern": [{"LOWER": "piquant"}], "id": "food-bias"}
816
+ {"label": "adjectives", "pattern": [{"LOWER": "plain"}], "id": "food-bias"}
817
+ {"label": "adjectives", "pattern": [{"LOWER": "poached"}], "id": "food-bias"}
818
+ {"label": "adjectives", "pattern": [{"LOWER": "pounded"}], "id": "food-bias"}
819
+ {"label": "adjectives", "pattern": [{"LOWER": "prepared"}], "id": "food-bias"}
820
+ {"label": "adjectives", "pattern": [{"LOWER": "pureed"}], "id": "food-bias"}
821
+ {"label": "adjectives", "pattern": [{"LOWER": "rancid"}], "id": "food-bias"}
822
+ {"label": "adjectives", "pattern": [{"LOWER": "rank"}], "id": "food-bias"}
823
+ {"label": "adjectives", "pattern": [{"LOWER": "rich"}], "id": "food-bias"}
824
+ {"label": "adjectives", "pattern": [{"LOWER": "ripe"}], "id": "food-bias"}
825
+ {"label": "adjectives", "pattern": [{"LOWER": "rubbery"}], "id": "food-bias"}
826
+ {"label": "adjectives", "pattern": [{"LOWER": "salty"}], "id": "food-bias"}
827
+ {"label": "adjectives", "pattern": [{"LOWER": "saucy"}], "id": "food-bias"}
828
+ {"label": "adjectives", "pattern": [{"LOWER": "savory"}], "id": "food-bias"}
829
+ {"label": "adjectives", "pattern": [{"LOWER": "seasoned"}], "id": "food-bias"}
830
+ {"label": "adjectives", "pattern": [{"LOWER": "sharp"}], "id": "food-bias"}
831
+ {"label": "adjectives", "pattern": [{"LOWER": "simmered"}], "id": "food-bias"}
832
+ {"label": "adjectives", "pattern": [{"LOWER": "smoked"}], "id": "food-bias"}
833
+ {"label": "adjectives", "pattern": [{"LOWER": "smoky"}], "id": "food-bias"}
834
+ {"label": "adjectives", "pattern": [{"LOWER": "sour"}], "id": "food-bias"}
835
+ {"label": "adjectives", "pattern": [{"LOWER": "spicy"}], "id": "food-bias"}
836
+ {"label": "adjectives", "pattern": [{"LOWER": "steamed"}], "id": "food-bias"}
837
+ {"label": "adjectives", "pattern": [{"LOWER": "sticky"}], "id": "food-bias"}
838
+ {"label": "adjectives", "pattern": [{"LOWER": "stringy"}], "id": "food-bias"}
839
+ {"label": "adjectives", "pattern": [{"LOWER": "strong"}], "id": "food-bias"}
840
+ {"label": "adjectives", "pattern": [{"LOWER": "succulent"}], "id": "food-bias"}
841
+ {"label": "adjectives", "pattern": [{"LOWER": "sugary"}], "id": "food-bias"}
842
+ {"label": "adjectives", "pattern": [{"LOWER": "sweet"}], "id": "food-bias"}
843
+ {"label": "adjectives", "pattern": [{"LOWER": "syrupy"}], "id": "food-bias"}
844
+ {"label": "adjectives", "pattern": [{"LOWER": "tangy"}], "id": "food-bias"}
845
+ {"label": "adjectives", "pattern": [{"LOWER": "tart"}], "id": "food-bias"}
846
+ {"label": "adjectives", "pattern": [{"LOWER": "tender"}], "id": "food-bias"}
847
+ {"label": "adjectives", "pattern": [{"LOWER": "toasted"}], "id": "food-bias"}
848
+ {"label": "adjectives", "pattern": [{"LOWER": "topped"}], "id": "food-bias"}
849
+ {"label": "adjectives", "pattern": [{"LOWER": "tossed"}], "id": "food-bias"}
850
+ {"label": "adjectives", "pattern": [{"LOWER": "yummy"}], "id": "food-bias"}
851
+ {"label": "adjectives", "pattern": [{"LOWER": "zingy"}], "id": "food-bias"}
852
+ {"label": "adjectives", "pattern": [{"LOWER": "braised"}], "id": "food-bias"}
853
+ {"label": "adjectives", "pattern": [{"LOWER": "fried"}], "id": "food-bias"}
854
+ {"label": "adjectives", "pattern": [{"LOWER": "fermented"}], "id": "food-bias"}
855
+ {"label": "adjectives", "pattern": [{"LOWER": "milky"}], "id": "food-bias"}
856
+ {"label": "adjectives", "pattern": [{"LOWER": "damaged"}], "id": "food-bias"}
857
+ {"label": "adjectives", "pattern": [{"LOWER": "spicy"}], "id": "food-bias"}
858
+ {"label": "adjectives", "pattern": [{"LOWER": "edible"}], "id": "food-bias"}
859
+ {"label": "adjectives", "pattern": [{"LOWER": "nutritious"}], "id": "food-bias"}
860
+ {"label": "adjectives", "pattern": [{"LOWER": "citric"}], "id": "food-bias"}
861
+ {"label": "adjectives", "pattern": [{"LOWER": "cloying"}], "id": "food-bias"}
862
+ {"label": "adjectives", "pattern": [{"LOWER": "caramelized"}], "id": "food-bias"}
NLselector.py ADDED
@@ -0,0 +1,197 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #Import the libraries we know we'll need for the Generator.
2
+ import pandas as pd, spacy, nltk, numpy as np, re
3
+ from spacy.matcher import Matcher
4
+ #!python -m spacy download en_core_web_md #Not sure if we need this so I'm going to keep it just in case
5
+ nlp = spacy.load("en_core_web_lg")
6
+ import altair as alt
7
+ import streamlit as st
8
+ from annotated_text import annotated_text as ant
9
+
10
+ #Import the libraries to support the model and predictions.
11
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification, TextClassificationPipeline
12
+ import lime
13
+ import torch
14
+ import torch.nn.functional as F
15
+ from lime.lime_text import LimeTextExplainer
16
+
17
+ class_names = ['negative', 'positive']
18
+ explainer = LimeTextExplainer(class_names=class_names)
19
+ tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")
20
+ model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")
21
+ pipe = TextClassificationPipeline(model=model, tokenizer=tokenizer, return_all_scores=True)
22
+
23
+ def predictor(texts):
24
+ outputs = model(**tokenizer(texts, return_tensors="pt", padding=True))
25
+ probas = F.softmax(outputs.logits, dim=1).detach().numpy()
26
+ return probas
27
+
28
+ @st.experimental_singleton
29
+ def critical_words(document, options=False):
30
+ if type(document) is not spacy.tokens.doc.Doc:
31
+ document = nlp(document)
32
+ chunks = list(document.noun_chunks)
33
+ pos_options = []
34
+ lime_options = []
35
+
36
+ #Identify what the model cares about.
37
+ if options:
38
+ #Run Lime Setup code
39
+ exp = explainer.explain_instance(document.text, predictor, num_features=15, num_samples=2000)
40
+ lime_results = exp.as_list()
41
+ for feature in lime_results:
42
+ lime_options.append(feature[0])
43
+ lime_results = pd.DataFrame(lime_results, columns=["Word","Weight"])
44
+
45
+ #Identify what we care about "parts of speech"
46
+ for chunk in chunks:
47
+ #The use of chunk[-1] is due to testing that it appears to always match the root
48
+ root = chunk[-1]
49
+ #This currently matches to a list I've created. I don't know the best way to deal with this so I'm leaving it as is for the moment.
50
+ if root.ent_type_:
51
+ cur_values = []
52
+ if (len(chunk) > 1) and (chunk[-2].dep_ == "compound"):
53
+ #creates the compound element of the noun
54
+ compound = [x.text for x in chunk if x.dep_ == "compound"]
55
+ print(f"This is the contents of {compound} and it is {all(elem in lime_options for elem in compound)} that all elements are present in {lime_options}.") #for QA
56
+ #checks to see all elements in the compound are important to the model or use the compound if not checking importance.
57
+ if (all(elem in lime_options for elem in cur_values) and (options is True)) or ((options is False)):
58
+ #creates a span for the entirety of the compound noun and adds it to the list.
59
+ span = -1 * (1 + len(compound))
60
+ pos_options.append(chunk[span:].text)
61
+ cur_values + [token.text for token in chunk if token.pos_ == "ADJ"]
62
+ else:
63
+ print(f"The elmenents in {compound} could not be added to the final list because they are not all relevant to the model.")
64
+ else:
65
+ cur_values = [token.text for token in chunk if (token.ent_type_) or (token.pos_ == "ADJ")]
66
+ if (all(elem in lime_options for elem in cur_values) and (options is True)) or ((options is False)):
67
+ pos_options.extend(cur_values)
68
+ print(f"From {chunk.text}, {cur_values} added to pos_options due to entity recognition.") #for QA
69
+ elif len(chunk) >= 1:
70
+ cur_values = [token.text for token in chunk if token.pos_ in ["NOUN","ADJ"]]
71
+ if (all(elem in lime_options for elem in cur_values) and (options is True)) or ((options is False)):
72
+ pos_options.extend(cur_values)
73
+ print(f"From {chunk.text}, {cur_values} added to pos_options due to wildcard.") #for QA
74
+ else:
75
+ print(f"No options added for \'{chunk.text}\' ")
76
+ # Here I am going to try to pick up pronouns, which are people, and Adjectival Compliments.
77
+ for token in document:
78
+ if (token.text not in pos_options) and ((token.text in lime_options) or (options == False)):
79
+ #print(f"executed {token.text} with {token.pos_} and {token.dep_}") #QA
80
+ if (token.pos_ == "ADJ") and (token.dep_ in ["acomp","conj"]):
81
+ pos_options.append(token.text)
82
+ elif (token.pos_ == "PRON") and (len(token.morph) !=0):
83
+ if (token.morph.get("PronType") == "Prs"):
84
+ pos_options.append(token.text)
85
+
86
+ if options:
87
+ return pos_options, lime_results
88
+ else:
89
+ return pos_options
90
+
91
+ # Return the Viz of elements critical to LIME.
92
+ def lime_viz(df):
93
+ if not isinstance(df, pd.DataFrame):
94
+ df = pd.DataFrame(df, columns=["Word","Weight"])
95
+ single_nearest = alt.selection_single(on='mouseover', nearest=True)
96
+ viz = alt.Chart(df).encode(
97
+ alt.X('Weight:Q', scale=alt.Scale(domain=(-1, 1))),
98
+ alt.Y('Word:N', sort='x', axis=None),
99
+ color=alt.Color("Weight", scale=alt.Scale(scheme='blueorange', domain=[0], type="threshold", range='diverging'), legend=None),
100
+ tooltip = ("Word","Weight")
101
+ ).mark_bar().properties(title ="Importance of individual words")
102
+
103
+ text = viz.mark_text(
104
+ fill="black",
105
+ align='right',
106
+ baseline='middle'
107
+ ).encode(
108
+ text='Word:N'
109
+ )
110
+ limeplot = alt.LayerChart(layer=[viz,text], width = 300).configure_axis(grid=False).configure_view(strokeWidth=0)
111
+ return limeplot
112
+
113
+ # Evaluate Predictions using the model and pipe.
114
+ def eval_pred(text, return_all = False):
115
+ '''A basic function for evaluating the prediction from the model and turning it into a visualization friendly number.'''
116
+ preds = pipe(text)
117
+ neg_score = -1 * preds[0][0]['score']
118
+ sent_neg = preds[0][0]['label']
119
+ pos_score = preds[0][1]['score']
120
+ sent_pos = preds[0][1]['label']
121
+ prediction = 0
122
+ sentiment = ''
123
+ if pos_score > abs(neg_score):
124
+ prediction = pos_score
125
+ sentiment = sent_pos
126
+ elif abs(neg_score) > pos_score:
127
+ prediction = neg_score
128
+ sentiment = sent_neg
129
+
130
+ if return_all:
131
+ return prediction, sentiment
132
+ else:
133
+ return prediction
134
+
135
+ def construct_nlexp(text,sentiment,probability):
136
+ prob = str(np.round(100 * abs(probability),2))
137
+ if sentiment == "NEGATIVE":
138
+ color_sent = ant('The model predicts the sentiment of the sentence you provided is ', (sentiment, "-", "#FFA44F"), ' with a probability of ', (prob, "neg", "#FFA44F"),"%.")
139
+ elif sentiment == "POSITIVE":
140
+ color_sent = ant('The model predicts the sentiment of the sentence you provided is ', (sentiment, "+", "#50A9FF"), ' with a probability of ', (prob, "pos", "#50A9FF"),"%.")
141
+ return color_sent
142
+
143
+ def get_min_max(df, seed):
144
+ '''This function provides the alternatives with the highest spaCy similarity scores and the lowest similarity scores. As similarity is based on vectorization of words and documents this may not be the best way to identify bias.
145
+
146
+ text2 = Most Similar
147
+ text3 = Least Similar'''
148
+ maximum = df[df['similarity'] < .9999].similarity.max()
149
+ text2 = df.loc[df['similarity'] == maximum, 'text'].iloc[0]
150
+ minimum = df[df['similarity'] > .0001].similarity.min()
151
+ text3 = df.loc[df['similarity'] == minimum, 'text'].iloc[0]
152
+ return text2, text3
153
+
154
+ # Inspired by https://stackoverflow.com/questions/17758023/return-rows-in-a-dataframe-closest-to-a-user-defined-number/17758115#17758115
155
+ def abs_dif(df,seed):
156
+ '''This function enables a user to identify the alternative that is closest to the seed and farthest from the seed should that be the what they wish to display.
157
+
158
+ text2 = Nearest Prediction
159
+ text3 = Farthest Prediction'''
160
+ target = df[df['Words'] == seed].pred.iloc[0]
161
+ sub_df = df[df['Words'] != seed].reset_index()
162
+ nearest_prediction = sub_df.pred[(sub_df.pred-target).abs().argsort()[:1]]
163
+ farthest_prediction = sub_df.pred[(sub_df.pred-target).abs().argsort()[-1:]]
164
+ text2 = sub_df.text.iloc[nearest_prediction.index[0]]
165
+ text3 = sub_df.text.iloc[farthest_prediction.index[0]]
166
+ return text2, text3
167
+
168
+ #@st.experimental_singleton #I've enabled this to prevent it from triggering every time the code runs... which could get very messy
169
+ def sampled_alts(df, seed, fixed=False):
170
+ '''This function enables a user to select an alternate way of choosing which counterfactuals are shown for MultiNLC, MultiNLC + Lime, and VizNLC. If you use this then you are enabling random sampling over other options (ex. spaCy similarity scores, or absolute difference).
171
+
172
+ Both samples are random.'''
173
+ sub_df = df[df['Words'] != seed]
174
+ if fixed:
175
+ sample = sub_df.sample(n=2, random_state = 2052)
176
+ else:
177
+ sample = sub_df.sample(n=2)
178
+ text2 = sample.text.iloc[0]
179
+ text3 = sample.text.iloc[1]
180
+ return text2, text3
181
+
182
+ def gen_cf_country(df,_document,selection):
183
+ df['text'] = df.Words.apply(lambda x: re.sub(r'\b'+selection+r'\b',x,_document.text))
184
+ df['pred'] = df.text.apply(eval_pred)
185
+ df['seed'] = df.Words.apply(lambda x: 'seed' if x == selection else 'alternative')
186
+ df['similarity'] = df.Words.apply(lambda x: nlp(selection).similarity(nlp(x)))
187
+ return df
188
+
189
+
190
+ def gen_cf_profession(df,_document,selection):
191
+ category = df.loc[df['Words'] == selection, 'Major'].iloc[0]
192
+ df = df[df.Major == category]
193
+ df['text'] = df.Words.apply(lambda x: re.sub(r'\b'+selection+r'\b',x,_document.text))
194
+ df['pred'] = df.text.apply(eval_pred)
195
+ df['seed'] = df.Words.apply(lambda x: 'seed' if x == selection else 'alternative')
196
+ df['similarity'] = df.Words.apply(lambda x: nlp(selection).similarity(nlp(x)))
197
+ return df
Pipfile ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [[source]]
2
+ url = "https://pypi.org/simple"
3
+ verify_ssl = true
4
+ name = "pypi"
5
+
6
+ [packages]
7
+ streamlit = "*"
8
+ pandas = "*"
9
+ numpy = "*"
10
+ altair = "*"
11
+ sklearn = "*"
12
+ streamlit-vega-lite = "*"
13
+ plotly = "*"
14
+ gensim = "*"
15
+ nltk = "*"
16
+ spacy = "*"
17
+ lime = "*"
18
+ xlrd = "*"
19
+ colorama = "*"
20
+ st-annotated-text = "*"
21
+ shap = "*"
22
+ transformers = "*"
23
+ torch = "*"
24
+ black = "==19.3b0"
25
+ pylint = "*"
26
+ watchdog = "*"
27
+ jupyterlab = "*"
28
+ jupyter = "*"
29
+
30
+
31
+ [requires]
32
+ python_version = "3.8"
33
+
34
+ [scripts]
35
+ format = "black ."
36
+ format_check = "black --check ."
37
+ lint = "pylint app.py"
38
+ app= "streamlit run app.py"
39
+ clear_cache = "streamlit cache clear"
40
+ notebook = "jupyter notebook"
Pipfile.lock ADDED
The diff for this file is too large to render. See raw diff
README OG.md ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # NLC-Gen
2
+ ### A Natural Language Counterfactual Generator for Exploring Bias in Sentiment Analysis Algorithms
3
+
4
+ ##### Overview
5
+ This project is an extension of [Interactive Model Cards](https://github.com/amcrisan/interactive-model-cards). It focuses on providing a person more ways to explore the bias of a model through the generation of alternatives (technically [counterfactuals](https://plato.stanford.edu/entries/counterfactuals/#WhatCoun)). We believe the use of alternatives people can better understand the limitations of a model and develop productive skepticism around its usage and trustworthiness.
6
+
7
+ ##### Set up
8
+
9
+ Download the files from Github then perform the commands below in
10
+ ```sh
11
+ cd NLC-Gen
12
+ pipenv install
13
+ pipenv shell
14
+ python -m spacy download en_core_web_lg
15
+ streamlit run NLC-app.py
16
+ ```
17
+
18
+ ##### Known Limitations
19
+ * Words not in the spaCy vocab for `en_core_web_lg` won't have vectors and so won't have the ability to create similarity scores.
20
+ * WordNet provides many limitations due to its age and lack of funding for ongoing maintenance. It provides access to a large variety of the English language but certain words simply do not exist.
21
+ * There are currently only 2 lists (Countries and Professions). We would like to find community curated lists for: Race, Sexual Orientation and Gender Identity (SOGI), Religion, age, and protected status.
22
+
23
+
24
+ ##### Key Dependencies and Packages
25
+
26
+ 1. [Hugging Face Transformers](https://huggingface.co/) - the model we've designed this iteration for is hosted on hugging face. It is: [distilbert-base-uncased-finetuned-sst-2-english](https://huggingface.co/distilbert-base-uncased-finetuned-sst-2-english).
27
+ 2. [Streamlit](https://streamlit.io) - This is the library we're using to build the prototype app because it is easy to stand up and quick to fix.
28
+ 3. [spaCy](https://spacy.io) - This is the main NLP Library we're using and it runs most of the text manipulation we're doing as part of the project.
29
+ 4. [NLTK + WordNet](https://www.nltk.org/howto/wordnet.html) - This is the initial lexical database we're using because it is accessible directly through Python and it is free. We will be considering a move to [ConceptNet](https://conceptnet.io/) for future iterations based on better lateral movement across edges.
30
+ 5. [Lime](https://github.com/marcotcr/lime) - We chose Lime over Shap because Lime has more of the functionality we need. Shap appears to provide greater performance but is not as easily suited to our original designs.
31
+ 6. [Altair](https://altair-viz.github.io/user_guide/encoding.html) - We're using Altair because it's well integrated into Streamlit.
32
+
33
+
34
+
VizNLC-duct-tape-pipeline.ipynb ADDED
@@ -0,0 +1,934 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "id": "8ea54fcd-ef4a-42cb-ae26-cbdc6f6ffc64",
6
+ "metadata": {
7
+ "tags": []
8
+ },
9
+ "source": [
10
+ "# Duct Tape Pipeline\n",
11
+ "To explore how users may interact with interactive visualizations of counterfactuals for evolving the Interactive Model Card, we will need to first find a way to generate counterfactuals based on a given input. We want the user to be able to provide their input and direct the system to generate counterfactuals based on a part of speech that is significant to the model. The system should then provide a data frame of counterfactuals to be used in an interactive visualization. Below is an example wireframe of the experience based on previous research.\n",
12
+ "\n",
13
+ "![wireframe](Assets/VizNLC-Wireframe-example.png)\n",
14
+ "\n",
15
+ "## Goals of this notebook\n",
16
+ "* Test which libraries (Ex. [spaCy](https://spacy.io/) and [NLTK](https://www.nltk.org/)) will work\n",
17
+ "* Identify defaults to use\n",
18
+ "* Build a rudimentary script for generating counterfactuals from user input\n",
19
+ "* Ensure the counterfactuals are in a useable format for visualization"
20
+ ]
21
+ },
22
+ {
23
+ "cell_type": "markdown",
24
+ "id": "736e6375-dd6d-4188-b8b1-92bded2bcd02",
25
+ "metadata": {},
26
+ "source": [
27
+ "## Loading the libraries and models"
28
+ ]
29
+ },
30
+ {
31
+ "cell_type": "code",
32
+ "execution_count": 3,
33
+ "id": "7f581785-e642-4f74-9f67-06a63820eaf2",
34
+ "metadata": {},
35
+ "outputs": [],
36
+ "source": [
37
+ "#Import the libraries we know we'll need for the Generator.\n",
38
+ "import pandas as pd, spacy, nltk, numpy as np\n",
39
+ "from spacy import displacy\n",
40
+ "from spacy.matcher import Matcher\n",
41
+ "#!python -m spacy download en_core_web_sm\n",
42
+ "nlp = spacy.load(\"en_core_web_sm\")\n",
43
+ "lemmatizer = nlp.get_pipe(\"lemmatizer\")\n",
44
+ "\n",
45
+ "#Import the libraries to support the model, predictions, and LIME.\n",
46
+ "from transformers import AutoTokenizer, AutoModelForSequenceClassification, TextClassificationPipeline\n",
47
+ "import lime\n",
48
+ "import torch\n",
49
+ "import torch.nn.functional as F\n",
50
+ "from lime.lime_text import LimeTextExplainer\n",
51
+ "\n",
52
+ "#Import the libraries for generating interactive visualizations.\n",
53
+ "import altair as alt"
54
+ ]
55
+ },
56
+ {
57
+ "cell_type": "code",
58
+ "execution_count": null,
59
+ "id": "cbe2b292-e33e-4915-8e61-bba5327fb643",
60
+ "metadata": {},
61
+ "outputs": [],
62
+ "source": [
63
+ "#Defining all necessary variables and instances.\n",
64
+ "tokenizer = AutoTokenizer.from_pretrained(\"distilbert-base-uncased-finetuned-sst-2-english\")\n",
65
+ "model = AutoModelForSequenceClassification.from_pretrained(\"distilbert-base-uncased-finetuned-sst-2-english\")\n",
66
+ "class_names = ['negative', 'positive']\n",
67
+ "explainer = LimeTextExplainer(class_names=class_names)"
68
+ ]
69
+ },
70
+ {
71
+ "cell_type": "code",
72
+ "execution_count": null,
73
+ "id": "197c3e26-0fdf-49c6-9135-57f1fd55d3e3",
74
+ "metadata": {},
75
+ "outputs": [],
76
+ "source": [
77
+ "#Defining a Predictor required for LIME to function.\n",
78
+ "def predictor(texts):\n",
79
+ " outputs = model(**tokenizer(texts, return_tensors=\"pt\", padding=True))\n",
80
+ " probas = F.softmax(outputs.logits, dim=1).detach().numpy()\n",
81
+ " return probas"
82
+ ]
83
+ },
84
+ {
85
+ "cell_type": "markdown",
86
+ "id": "e731dcbb-4fcf-41c6-9493-edef02fdb1b6",
87
+ "metadata": {},
88
+ "source": [
89
+ "## Exploring concepts to see what might work\n",
90
+ "To begin building the pipeline I started by identifying whether or not I needed to build my own matcher or if spaCy has something built in that would allow us to make it easier. Having to build our own matcher, to account for each of the possible patterns, would be exceptionally cumbersome with all of the variations we need to look out for. Instead, I found that using the built in `noun_chunks` attribute allows for a simplification to the parts of speech we most care about. \n",
91
+ "* I built a few helper functions from tutorials to explore the parts-of-speech within given sentences and the way `noun_chunks` work\n",
92
+ "* I explore dusing `displacy` as a means of visualizing sentences to call out what the pre-trained models already understand"
93
+ ]
94
+ },
95
+ {
96
+ "cell_type": "code",
97
+ "execution_count": null,
98
+ "id": "1f2eca3c-525c-4e29-8cc1-c87e89a3fadf",
99
+ "metadata": {},
100
+ "outputs": [],
101
+ "source": [
102
+ "#A quick test of Noun Chunks\n",
103
+ "text = \"The movie was filmed in New Zealand.\"\n",
104
+ "doc = nlp(text)\n",
105
+ "def n_chunk(doc):\n",
106
+ " for chunk in doc.noun_chunks:\n",
107
+ " print(f\"Text: {chunk.text:<12}| Root:{chunk.root.text:<12}| Root Dependency: {chunk.root.dep_:<12}| Root Head: {chunk.root.head.text:<12}\")\n",
108
+ "n_chunk(doc)"
109
+ ]
110
+ },
111
+ {
112
+ "cell_type": "code",
113
+ "execution_count": null,
114
+ "id": "98978c29-a39c-48e3-bdbb-b74388ded6bc",
115
+ "metadata": {},
116
+ "outputs": [],
117
+ "source": [
118
+ "#The user will need to enter text. For now, we're going to provide a series of sentences generated to have things we care about. For clarity \"upt\" means \"user provide text\".\n",
119
+ "upt1 = \"I like movies starring black actors.\"\n",
120
+ "upt2 = \"I am a black trans-woman.\"\n",
121
+ "upt3 = \"Native Americans deserve to have their land back.\"\n",
122
+ "upt4 = \"This movie was filmed in Iraq.\"\n",
123
+ "\n",
124
+ "#Here I provide a larger text with mixed messages one sentence per line.\n",
125
+ "text1 = (\n",
126
+ "\"I like movies starring black actors.\"\n",
127
+ "\"I am a black trans-woman.\"\n",
128
+ "\"Native Americans deserve to have their land back.\"\n",
129
+ "\"This movie was filmed in Iraq.\"\n",
130
+ "\"The Chinese cat and the African bat walked into a Jamaican bar.\"\n",
131
+ "\"There once was a flexible pole that met an imovable object.\"\n",
132
+ "\"A Catholic nun, a Buddhist monk, a satanic cultist, and a Wiccan walk into your garage.\")\n",
133
+ "\n",
134
+ "doc1 = nlp(upt1)\n",
135
+ "doc2 = nlp(upt2)\n",
136
+ "doc3 = nlp(upt3)\n",
137
+ "doc4 = nlp(upt4)\n",
138
+ "doct = nlp(text1)"
139
+ ]
140
+ },
141
+ {
142
+ "cell_type": "code",
143
+ "execution_count": null,
144
+ "id": "38023eca-b224-412d-aa71-02bd694530e0",
145
+ "metadata": {},
146
+ "outputs": [],
147
+ "source": [
148
+ "#Using displacy to explore how the NLP model views sentences.\n",
149
+ "displacy.render(doc, style=\"ent\")"
150
+ ]
151
+ },
152
+ {
153
+ "cell_type": "code",
154
+ "execution_count": null,
155
+ "id": "c28edec8-dc30-4ef9-8c1e-131b0e1b1a45",
156
+ "metadata": {},
157
+ "outputs": [],
158
+ "source": [
159
+ "#Another visual for understanding how the model views sentences.\n",
160
+ "displacy.render(doc, style=\"dep\")"
161
+ ]
162
+ },
163
+ {
164
+ "cell_type": "code",
165
+ "execution_count": 4,
166
+ "id": "dd0d5f8e-ee80-48f7-be92-effa5f84c723",
167
+ "metadata": {},
168
+ "outputs": [],
169
+ "source": [
170
+ "#A simple token to print out the \n",
171
+ "def text_pos(doc):\n",
172
+ " for token in doc:\n",
173
+ " # Get the token text, part-of-speech tag and dependency label\n",
174
+ " token_text = token.text\n",
175
+ " token_pos = token.pos_\n",
176
+ " token_dep = token.dep_\n",
177
+ " token_ent = token.ent_type_\n",
178
+ " token_morph = token.morph\n",
179
+ " # This is for formatting only\n",
180
+ " print(f\"Text: {token_text:<12}| Part of Speech: {token_pos:<10}| Dependency: {token_dep:<10}| Entity: {token_ent:<10} | Morph: {token_morph}\")"
181
+ ]
182
+ },
183
+ {
184
+ "cell_type": "code",
185
+ "execution_count": 6,
186
+ "id": "5dfee095-3852-4dba-a7dc-5519e8ec6eaa",
187
+ "metadata": {},
188
+ "outputs": [
189
+ {
190
+ "name": "stdout",
191
+ "output_type": "stream",
192
+ "text": [
193
+ "Text: Who | Part of Speech: PRON | Dependency: nsubj | Entity: | Morph: \n",
194
+ "Text: put | Part of Speech: VERB | Dependency: ROOT | Entity: | Morph: Tense=Past|VerbForm=Fin\n",
195
+ "Text: a | Part of Speech: DET | Dependency: det | Entity: | Morph: Definite=Ind|PronType=Art\n",
196
+ "Text: tiny | Part of Speech: ADJ | Dependency: amod | Entity: | Morph: Degree=Pos\n",
197
+ "Text: pickle | Part of Speech: NOUN | Dependency: dobj | Entity: | Morph: Number=Sing\n",
198
+ "Text: in | Part of Speech: ADP | Dependency: prep | Entity: | Morph: \n",
199
+ "Text: the | Part of Speech: DET | Dependency: det | Entity: | Morph: Definite=Def|PronType=Art\n",
200
+ "Text: jar | Part of Speech: NOUN | Dependency: pobj | Entity: | Morph: Number=Sing\n"
201
+ ]
202
+ }
203
+ ],
204
+ "source": [
205
+ "x = nlp(\"Who put a tiny pickle in the jar\")\n",
206
+ "text_pos(x)"
207
+ ]
208
+ },
209
+ {
210
+ "cell_type": "code",
211
+ "execution_count": 11,
212
+ "id": "2485d88d-2dd4-4fa3-9d62-4dcbec4e9138",
213
+ "metadata": {},
214
+ "outputs": [
215
+ {
216
+ "data": {
217
+ "text/plain": [
218
+ "0"
219
+ ]
220
+ },
221
+ "execution_count": 11,
222
+ "metadata": {},
223
+ "output_type": "execute_result"
224
+ }
225
+ ],
226
+ "source": [
227
+ "len(x[0].morph)"
228
+ ]
229
+ },
230
+ {
231
+ "cell_type": "code",
232
+ "execution_count": null,
233
+ "id": "013af6ac-f7d1-41d2-a601-b0f9a4870815",
234
+ "metadata": {},
235
+ "outputs": [],
236
+ "source": [
237
+ "#Instantiate a matcher and use it to test some patterns.\n",
238
+ "matcher = Matcher(nlp.vocab)\n",
239
+ "pattern = [{\"ENT_TYPE\": {\"IN\":[\"NORP\",\"GPE\"]}}]\n",
240
+ "matcher.add(\"proper_noun\", [pattern])\n",
241
+ "pattern_test = [{\"DEP\": \"amod\"},{\"DEP\":\"attr\"},{\"TEXT\":\"-\"},{\"DEP\":\"attr\",\"OP\":\"+\"}]\n",
242
+ "matcher.add(\"amod_attr\",[pattern_test])\n",
243
+ "pattern_an = [{\"DEP\": \"amod\"},{\"POS\":{\"IN\":[\"NOUN\",\"PROPN\"]}},{\"DEP\":{\"NOT_IN\":[\"attr\"]}}]\n",
244
+ "matcher.add(\"amod_noun\", [pattern_an])"
245
+ ]
246
+ },
247
+ {
248
+ "cell_type": "code",
249
+ "execution_count": null,
250
+ "id": "f6ac821d-7b56-446e-b9ca-42a5f5afd198",
251
+ "metadata": {},
252
+ "outputs": [],
253
+ "source": [
254
+ "def match_this(matcher, doc):\n",
255
+ " matches = matcher(doc)\n",
256
+ " for match_id, start, end in matches:\n",
257
+ " matched_span = doc[start:end]\n",
258
+ " print(f\"Mached {matched_span.text} by the rule {nlp.vocab.strings[match_id]}.\")\n",
259
+ " return matches"
260
+ ]
261
+ },
262
+ {
263
+ "cell_type": "code",
264
+ "execution_count": null,
265
+ "id": "958e4dc8-6652-4f32-b7ae-6aa5ee287cf7",
266
+ "metadata": {},
267
+ "outputs": [],
268
+ "source": [
269
+ "match_this(matcher, doct)"
270
+ ]
271
+ },
272
+ {
273
+ "cell_type": "code",
274
+ "execution_count": null,
275
+ "id": "5bf40fa5-b636-47f7-98b2-e872c78e7114",
276
+ "metadata": {},
277
+ "outputs": [],
278
+ "source": [
279
+ "text_pos(doc3)"
280
+ ]
281
+ },
282
+ {
283
+ "cell_type": "code",
284
+ "execution_count": null,
285
+ "id": "c5365304-5edb-428d-abf5-d579dcfbc269",
286
+ "metadata": {},
287
+ "outputs": [],
288
+ "source": [
289
+ "n_chunk(doc3)"
290
+ ]
291
+ },
292
+ {
293
+ "cell_type": "code",
294
+ "execution_count": null,
295
+ "id": "b7f3d3c8-65a1-433f-a47c-adcaaa2353e2",
296
+ "metadata": {},
297
+ "outputs": [],
298
+ "source": [
299
+ "displacy.render(doct, style=\"ent\")"
300
+ ]
301
+ },
302
+ {
303
+ "cell_type": "code",
304
+ "execution_count": null,
305
+ "id": "84df8e30-d142-4e5b-b3a9-02e3133ceba9",
306
+ "metadata": {},
307
+ "outputs": [],
308
+ "source": [
309
+ "txt = \"Savannah is a city in Georgia, in the United States\"\n",
310
+ "doc = nlp(txt)\n",
311
+ "displacy.render(doc, style=\"ent\")"
312
+ ]
313
+ },
314
+ {
315
+ "cell_type": "code",
316
+ "execution_count": null,
317
+ "id": "4a85f713-92bc-48ba-851e-de627d7e8c77",
318
+ "metadata": {},
319
+ "outputs": [],
320
+ "source": [
321
+ "displacy.render(doc2, style='dep')"
322
+ ]
323
+ },
324
+ {
325
+ "cell_type": "code",
326
+ "execution_count": null,
327
+ "id": "032f1134-7560-400b-824b-bc0196058b66",
328
+ "metadata": {},
329
+ "outputs": [],
330
+ "source": [
331
+ "n_chunk(doct)"
332
+ ]
333
+ },
334
+ {
335
+ "cell_type": "markdown",
336
+ "id": "188044a1-4cf4-4141-a520-c5f11198aed8",
337
+ "metadata": {},
338
+ "source": [
339
+ "* The Model does not recognize `wiccan` as a NORP but it will recognize `Wiccan` as NORP\n",
340
+ "* The Model does not know what to do with `-` and makes a mess of `trans-woman` because of this"
341
+ ]
342
+ },
343
+ {
344
+ "cell_type": "code",
345
+ "execution_count": null,
346
+ "id": "2dc82250-e26e-49d5-a7f2-d4eeda170e4e",
347
+ "metadata": {},
348
+ "outputs": [],
349
+ "source": [
350
+ "chunks = list(doc1.noun_chunks)\n",
351
+ "print(chunks[-1][-2].pos_)"
352
+ ]
353
+ },
354
+ {
355
+ "cell_type": "markdown",
356
+ "id": "c23d48c4-f5ab-4428-9244-0786e9903a8e",
357
+ "metadata": {},
358
+ "source": [
359
+ "## Building the Duct-Tape Pipeline cell-by-cell"
360
+ ]
361
+ },
362
+ {
363
+ "cell_type": "code",
364
+ "execution_count": null,
365
+ "id": "7ed22421-4401-482e-b54a-ee70d3187037",
366
+ "metadata": {},
367
+ "outputs": [],
368
+ "source": [
369
+ "#Lists of important words\n",
370
+ "gender = [\"man\", \"woman\",\"girl\",\"boy\",\"male\",\"female\",\"husband\",\"wife\",\"girlfriend\",\"boyfriend\",\"brother\",\"sister\",\"aunt\",\"uncle\",\"grandma\",\"grandpa\",\"granny\",\"granps\",\"grandmother\",\"grandfather\",\"mama\",\"dada\",\"Ma\",\"Pa\",\"lady\",\"gentleman\"]\n",
371
+ "#consider pulling ethnicities from https://github.com/cgio/global-ethnicities"
372
+ ]
373
+ },
374
+ {
375
+ "cell_type": "code",
376
+ "execution_count": null,
377
+ "id": "8b02a5d4-8a6b-4e5e-8f15-4f9182fe341f",
378
+ "metadata": {},
379
+ "outputs": [],
380
+ "source": [
381
+ "def select_crit(document, options=False, limelist=False):\n",
382
+ " '''This function is meant to select the critical part of a sentence. Critical, in this context means\n",
383
+ " the part of the sentence that is either: A) a PROPN from the correct entity group; B) an ADJ associated with a NOUN;\n",
384
+ " C) a NOUN that represents gender. It also checks this against what the model thinks is important if the user defines \"options\" as \"LIME\" or True.'''\n",
385
+ " chunks = list(document.noun_chunks)\n",
386
+ " pos_options = []\n",
387
+ " lime_options = []\n",
388
+ " \n",
389
+ " #Identify what the model cares about.\n",
390
+ " if options:\n",
391
+ " exp = explainer.explain_instance(document.text, predictor, num_features=20, num_samples=2000)\n",
392
+ " results = exp.as_list()[:10]\n",
393
+ " #prints the results from lime for QA.\n",
394
+ " if limelist == True:\n",
395
+ " print(results)\n",
396
+ " for feature in results:\n",
397
+ " lime_options.append(feature[0])\n",
398
+ " \n",
399
+ " #Identify what we care about \"parts of speech\"\n",
400
+ " for chunk in chunks:\n",
401
+ " #The use of chunk[-1] is due to testing that it appears to always match the root\n",
402
+ " root = chunk[-1]\n",
403
+ " #This currently matches to a list I've created. I don't know the best way to deal with this so I'm leaving it as is for the moment.\n",
404
+ " if root.text.lower() in gender:\n",
405
+ " cur_values = [token.text for token in chunk if token.pos_ in [\"NOUN\",\"ADJ\"]]\n",
406
+ " if (all(elem in lime_options for elem in cur_values) and ((options == \"LIME\") or (options == True))) or ((options != \"LIME\") and (options != True)):\n",
407
+ " pos_options.extend(cur_values)\n",
408
+ " #print(f\"From {chunk.text}, {cur_values} added to pos_options due to gender.\") #for QA\n",
409
+ " #This is currently set to pick up entities in a particular set of groups (which I recently expanded). Should it just pick up all named entities?\n",
410
+ " elif root.ent_type_ in [\"GPE\",\"NORP\",\"DATE\",\"EVENT\"]:\n",
411
+ " cur_values = []\n",
412
+ " if (len(chunk) > 1) and (chunk[-2].dep_ == \"compound\"):\n",
413
+ " #creates the compound element of the noun\n",
414
+ " compound = [x.text for x in chunk if x.dep_ == \"compound\"]\n",
415
+ " print(f\"This is the contents of {compound} and it is {all(elem in lime_options for elem in compound)} that all elements are present in {lime_options}.\") #for QA\n",
416
+ " #checks to see all elements in the compound are important to the model or use the compound if not checking importance.\n",
417
+ " if (all(elem in lime_options for elem in compound) and ((options == \"LIME\") or (options == True))) or ((options != \"LIME\") and (options != True)):\n",
418
+ " #creates a span for the entirety of the compound noun and adds it to the list.\n",
419
+ " span = -1 * (1 + len(compound))\n",
420
+ " pos_options.append(chunk[span:].text)\n",
421
+ " cur_values + [token.text for token in chunk if token.pos_ == \"ADJ\"]\n",
422
+ " else: \n",
423
+ " cur_values = [token.text for token in chunk if (token.ent_type_ in [\"GPE\",\"NORP\",\"DATE\",\"EVENT\"]) or (token.pos_ == \"ADJ\")]\n",
424
+ " if (all(elem in lime_options for elem in cur_values) and ((options == \"LIME\") or (options == True))) or ((options != \"LIME\") and (options != True)):\n",
425
+ " pos_options.extend(cur_values)\n",
426
+ " print(f\"From {chunk.text}, {cur_values} and {pos_options} added to pos_options due to entity recognition.\") #for QA\n",
427
+ " elif len(chunk) > 1:\n",
428
+ " cur_values = [token.text for token in chunk if token.pos_ in [\"NOUN\",\"ADJ\"]]\n",
429
+ " if (all(elem in lime_options for elem in cur_values) and ((options == \"LIME\") or (options == True))) or ((options != \"LIME\") and (options != True)):\n",
430
+ " pos_options.extend(cur_values)\n",
431
+ " print(f\"From {chunk.text}, {cur_values} added to pos_options due to wildcard.\") #for QA\n",
432
+ " else:\n",
433
+ " print(f\"No options added for \\'{chunk.text}\\' \")\n",
434
+ " \n",
435
+ " #Return the correct set of options based on user input, defaults to POS for simplicity.\n",
436
+ " if options == \"LIME\":\n",
437
+ " return lime_options\n",
438
+ " else:\n",
439
+ " return pos_options"
440
+ ]
441
+ },
442
+ {
443
+ "cell_type": "code",
444
+ "execution_count": null,
445
+ "id": "fa95e9fe-36ea-4b95-ab51-6bb82f745c23",
446
+ "metadata": {},
447
+ "outputs": [],
448
+ "source": [
449
+ "#Testing a method to make sure I had the ability to match one list inside the other. Now incorporated in the above function's logic.\n",
450
+ "one = ['a','b','c']\n",
451
+ "two = ['a','c']\n",
452
+ "all(elem in one for elem in two)"
453
+ ]
454
+ },
455
+ {
456
+ "cell_type": "code",
457
+ "execution_count": null,
458
+ "id": "d43e202e-64b9-4cea-b117-82492c9ee5f4",
459
+ "metadata": {},
460
+ "outputs": [],
461
+ "source": [
462
+ "#Test to make sure all three options work\n",
463
+ "pos4 = select_crit(doc4)\n",
464
+ "lime4 = select_crit(doc4,options=\"LIME\")\n",
465
+ "final4 = select_crit(doc4,options=True,limelist=True)\n",
466
+ "print(pos4, lime4, final4)"
467
+ ]
468
+ },
469
+ {
470
+ "cell_type": "code",
471
+ "execution_count": null,
472
+ "id": "5623015e-fdb2-44f0-b5ac-812203b639b3",
473
+ "metadata": {},
474
+ "outputs": [],
475
+ "source": [
476
+ "#This is a test to make sure compounds of any length are captured. \n",
477
+ "txt = \"I went to Papua New Guinea for Christmas Eve and New Years.\"\n",
478
+ "doc_t = nlp(txt)\n",
479
+ "select_crit(doc_t)"
480
+ ]
481
+ },
482
+ {
483
+ "cell_type": "code",
484
+ "execution_count": null,
485
+ "id": "58be22eb-a5c3-4a01-820b-45d190fce52d",
486
+ "metadata": {},
487
+ "outputs": [],
488
+ "source": [
489
+ "#Test to make sure all three options work. A known issue is that if we combine the compounds then they will not end up in the final_options...\n",
490
+ "pos_t = select_crit(doc_t)\n",
491
+ "lime_t = select_crit(doc_t,options=\"LIME\")\n",
492
+ "final_t = select_crit(doc_t,options=True,limelist=True)\n",
493
+ "print(pos_t, lime_t, final_t)"
494
+ ]
495
+ },
496
+ {
497
+ "cell_type": "code",
498
+ "execution_count": null,
499
+ "id": "1158de94-1472-4001-b3a1-42a488bcb20f",
500
+ "metadata": {},
501
+ "outputs": [],
502
+ "source": [
503
+ "select_crit(doc_t,options=True)"
504
+ ]
505
+ },
506
+ {
507
+ "cell_type": "markdown",
508
+ "id": "05063ede-422f-4536-8408-ceb5441adbe8",
509
+ "metadata": {},
510
+ "source": [
511
+ "> Note `Papua` and `Eve` have such low impact on the model that they do not always appear... so there will always be limitations to matching."
512
+ ]
513
+ },
514
+ {
515
+ "cell_type": "code",
516
+ "execution_count": null,
517
+ "id": "2c7c1ca9-4962-4fbe-b18b-1e20a223aff9",
518
+ "metadata": {},
519
+ "outputs": [],
520
+ "source": [
521
+ "select_crit(doc_t,options=\"LIME\")"
522
+ ]
523
+ },
524
+ {
525
+ "cell_type": "code",
526
+ "execution_count": null,
527
+ "id": "c70387a5-c431-43a5-a3b8-7533268a94e3",
528
+ "metadata": {},
529
+ "outputs": [],
530
+ "source": [
531
+ "displacy.render(doc_t, style=\"ent\")"
532
+ ]
533
+ },
534
+ {
535
+ "cell_type": "code",
536
+ "execution_count": null,
537
+ "id": "4b92d276-7d67-4c1c-940b-d3b2dcc756b9",
538
+ "metadata": {},
539
+ "outputs": [],
540
+ "source": [
541
+ "#This run clearly indicates that this pipeline from spaCy does not know what to do with hyphens(\"-\") and that we need to be aware of that.\n",
542
+ "choices = select_crit(doct)\n",
543
+ "choices"
544
+ ]
545
+ },
546
+ {
547
+ "cell_type": "code",
548
+ "execution_count": null,
549
+ "id": "ea6b29d0-d0fa-4eb3-af9c-970759124145",
550
+ "metadata": {},
551
+ "outputs": [],
552
+ "source": [
553
+ "user_choice = choices[2]\n",
554
+ "matcher2 = Matcher(nlp.vocab)\n",
555
+ "pattern = [{\"TEXT\": user_choice}]\n",
556
+ "matcher2.add(\"user choice\", [pattern])"
557
+ ]
558
+ },
559
+ {
560
+ "cell_type": "code",
561
+ "execution_count": null,
562
+ "id": "d32754b8-f1fa-4781-a6b0-829ad7ec2e50",
563
+ "metadata": {},
564
+ "outputs": [],
565
+ "source": [
566
+ "#consider using https://github.com/writerai/replaCy instead\n",
567
+ "match_id, start, end = match_this(matcher2,doc2)[0]"
568
+ ]
569
+ },
570
+ {
571
+ "cell_type": "code",
572
+ "execution_count": null,
573
+ "id": "a0362734-020b-49ad-b566-fdc7196e705c",
574
+ "metadata": {},
575
+ "outputs": [],
576
+ "source": [
577
+ "docx = doc2.text.replace(user_choice,\"man\")\n",
578
+ "docx"
579
+ ]
580
+ },
581
+ {
582
+ "cell_type": "markdown",
583
+ "id": "bf0512b6-336e-4842-9bde-34e03a1ca7c6",
584
+ "metadata": {},
585
+ "source": [
586
+ "### Testing predictions and visualization\n",
587
+ "Here I will attempt to import the model from huggingface, generate predictions for each of the sentences, and then visualize those predictions into a dot plot. If I can get this to work then I will move on to testing a full pipeline for letting the user pick which part of the sentence they wish to generate counterfactuals for."
588
+ ]
589
+ },
590
+ {
591
+ "cell_type": "code",
592
+ "execution_count": null,
593
+ "id": "e0bd4134-3b22-4ae8-870c-3a66c1cf8b23",
594
+ "metadata": {},
595
+ "outputs": [],
596
+ "source": [
597
+ "#Testing to see how to get predictions from the model. Ultimately, this did not work.\n",
598
+ "token = tokenizer(upt4, return_tensors=\"pt\")\n",
599
+ "labels = torch.tensor([1]).unsqueeze(0) # Batch size 1\n",
600
+ "outputs = model(**token, labels=labels)"
601
+ ]
602
+ },
603
+ {
604
+ "cell_type": "code",
605
+ "execution_count": null,
606
+ "id": "74c639bb-e74a-4a46-8047-3552265ae6a4",
607
+ "metadata": {},
608
+ "outputs": [],
609
+ "source": [
610
+ "#Discovering that there's a pipeline specifically to provide scores. \n",
611
+ "#I used it to get a list of lists of dictionaries that I can then manipulate to calculate the proper prediction score.\n",
612
+ "pipe = TextClassificationPipeline(model=model, tokenizer=tokenizer, return_all_scores=True)"
613
+ ]
614
+ },
615
+ {
616
+ "cell_type": "code",
617
+ "execution_count": null,
618
+ "id": "8e1ff15d-0fb9-475b-bd24-4548c0782343",
619
+ "metadata": {},
620
+ "outputs": [],
621
+ "source": [
622
+ "preds = pipe(upt4)\n",
623
+ "print(preds[0][0])"
624
+ ]
625
+ },
626
+ {
627
+ "cell_type": "code",
628
+ "execution_count": null,
629
+ "id": "d8abb9ca-36cf-441a-9236-1f7e44331b53",
630
+ "metadata": {},
631
+ "outputs": [],
632
+ "source": [
633
+ "score_1 = preds[0][0]['score']\n",
634
+ "score_2 = (score_1 - .5) * 2\n",
635
+ "print(score_1, score_2)"
636
+ ]
637
+ },
638
+ {
639
+ "cell_type": "code",
640
+ "execution_count": null,
641
+ "id": "8726a284-99bd-47f1-9756-1c3ae603db10",
642
+ "metadata": {},
643
+ "outputs": [],
644
+ "source": [
645
+ "def eval_pred(text):\n",
646
+ " '''A basic function for evaluating the prediction from the model and turning it into a visualization friendly number.'''\n",
647
+ " preds = pipe(text)\n",
648
+ " neg_score = preds[0][0]['score']\n",
649
+ " pos_score = preds[0][1]['score']\n",
650
+ " if pos_score >= neg_score:\n",
651
+ " return pos_score\n",
652
+ " if neg_score >= pos_score:\n",
653
+ " return -1 * neg_score"
654
+ ]
655
+ },
656
+ {
657
+ "cell_type": "code",
658
+ "execution_count": null,
659
+ "id": "f38f5061-f30a-4c81-9465-37951c3ad9f4",
660
+ "metadata": {},
661
+ "outputs": [],
662
+ "source": [
663
+ "def eval_pred_test(text, return_all = False):\n",
664
+ " '''A basic function for evaluating the prediction from the model and turning it into a visualization friendly number.'''\n",
665
+ " preds = pipe(text)\n",
666
+ " neg_score = -1 * preds[0][0]['score']\n",
667
+ " sent_neg = preds[0][0]['label']\n",
668
+ " pos_score = preds[0][1]['score']\n",
669
+ " sent_pos = preds[0][1]['label']\n",
670
+ " prediction = 0\n",
671
+ " sentiment = ''\n",
672
+ " if pos_score > abs(neg_score):\n",
673
+ " prediction = pos_score\n",
674
+ " sentiment = sent_pos\n",
675
+ " elif abs(neg_score) > pos_score:\n",
676
+ " prediction = neg_score\n",
677
+ " sentiment = sent_neg\n",
678
+ " \n",
679
+ " if return_all:\n",
680
+ " return prediction, sentiment\n",
681
+ " else:\n",
682
+ " return prediction"
683
+ ]
684
+ },
685
+ {
686
+ "cell_type": "code",
687
+ "execution_count": null,
688
+ "id": "abd5dd8c-8cff-4865-abf1-f5a744f2203b",
689
+ "metadata": {},
690
+ "outputs": [],
691
+ "source": [
692
+ "score = eval_pred(upt4)\n",
693
+ "og_data = {'Country': ['Iraq'], 'Continent': ['Asia'], 'text':[upt4], 'pred':[score]}\n",
694
+ "og_df = pd.DataFrame(og_data)\n",
695
+ "og_df"
696
+ ]
697
+ },
698
+ {
699
+ "cell_type": "markdown",
700
+ "id": "8b349a87-fe83-4045-a63a-d054489bb461",
701
+ "metadata": {},
702
+ "source": [
703
+ "## Load the dummy countries I created to test generating counterfactuals\n",
704
+ "I decided to test the pipeline with a known problem space. Taking the text from Aurélien Géron's observations in twitter, I built a built a small scale test using the learnings I had to prove that we can identify a particular part of speech, use it to generate counterfactuals, and then build a visualization off it."
705
+ ]
706
+ },
707
+ {
708
+ "cell_type": "code",
709
+ "execution_count": null,
710
+ "id": "46ab3332-964c-449f-8cef-a9ff7df397a4",
711
+ "metadata": {},
712
+ "outputs": [],
713
+ "source": [
714
+ "#load my test data from https://github.com/dbouquin/IS_608/blob/master/NanosatDB_munging/Countries-Continents.csv\n",
715
+ "df = pd.read_csv(\"Assets/Countries/countries.csv\")\n",
716
+ "df.head()"
717
+ ]
718
+ },
719
+ {
720
+ "cell_type": "code",
721
+ "execution_count": null,
722
+ "id": "51c75894-80af-4625-8ce8-660e500b496b",
723
+ "metadata": {},
724
+ "outputs": [],
725
+ "source": [
726
+ "#Note: we will need to build the function that lets the user choose from the options available. For now I have hard coded it as \"selection\", from \"user_options\".\n",
727
+ "user_options = select_crit(doc4)\n",
728
+ "print(user_options)\n",
729
+ "selection = user_options[1]\n",
730
+ "selection"
731
+ ]
732
+ },
733
+ {
734
+ "cell_type": "code",
735
+ "execution_count": null,
736
+ "id": "3d6419f1-bf7d-44bc-afb8-ac26ef9002df",
737
+ "metadata": {},
738
+ "outputs": [],
739
+ "source": [
740
+ "#Create a function that generates the counterfactuals within a data frame.\n",
741
+ "def gen_cf_country(df,document,selection):\n",
742
+ " df['text'] = df.Country.apply(lambda x: document.text.replace(selection,x))\n",
743
+ " df['prediction'] = df.text.apply(eval_pred_test)\n",
744
+ " #added this because I think it will make the end results better if we ensure the seed is in the data we generate counterfactuals from.\n",
745
+ " df['seed'] = df.Country.apply(lambda x: 'seed' if x == selection else 'alternative')\n",
746
+ " return df\n",
747
+ "\n",
748
+ "df = gen_cf_country(df,doc4,selection)\n",
749
+ "df.head()"
750
+ ]
751
+ },
752
+ {
753
+ "cell_type": "code",
754
+ "execution_count": null,
755
+ "id": "aec241a6-48c3-48c6-9e7f-d22612eaedff",
756
+ "metadata": {},
757
+ "outputs": [],
758
+ "source": [
759
+ "#Display Counterfactuals and Original in a layered chart. I couldn't get this to provide a legend.\n",
760
+ "og = alt.Chart(og_df).encode(\n",
761
+ " x='Continent:N',\n",
762
+ " y='pred:Q'\n",
763
+ ").mark_square(color='green', size = 200, opacity=.5)\n",
764
+ "\n",
765
+ "cf = alt.Chart(df).encode(\n",
766
+ " x='Continent:N', # specify nominal data\n",
767
+ " y='prediction:Q', # specify quantitative data\n",
768
+ ").mark_circle(color='blue', size=50, opacity =.25)\n",
769
+ "\n",
770
+ "alt_plot = alt.LayerChart(layer=[cf,og], width = 300)\n",
771
+ "alt_plot"
772
+ ]
773
+ },
774
+ {
775
+ "cell_type": "code",
776
+ "execution_count": null,
777
+ "id": "ecb9dd41-2fab-49bd-bae5-30300ce39e41",
778
+ "metadata": {},
779
+ "outputs": [],
780
+ "source": [
781
+ "single_nearest = alt.selection_single(on='mouseover', nearest=True)\n",
782
+ "full = alt.Chart(df).encode(\n",
783
+ " alt.X('Continent:N'), # specify nominal data\n",
784
+ " alt.Y('prediction:Q'), # specify quantitative data\n",
785
+ " color=alt.Color('seed:N', legend=alt.Legend(title=\"Seed or Alternative\")),\n",
786
+ " size=alt.Size('seed:N', alt.scale(domain=[50,100])),\n",
787
+ " tooltip=('Country','prediction')\n",
788
+ ").mark_circle(opacity=.5).properties(width=300).add_selection(single_nearest)\n",
789
+ "\n",
790
+ "full"
791
+ ]
792
+ },
793
+ {
794
+ "cell_type": "code",
795
+ "execution_count": null,
796
+ "id": "56bc30d7-03a5-43ff-9dfe-878197628305",
797
+ "metadata": {},
798
+ "outputs": [],
799
+ "source": [
800
+ "df2 = df.nlargest(5, 'prediction')\n",
801
+ "df3 = df.nsmallest(5, 'prediction')\n",
802
+ "frames = [df2,df3]\n",
803
+ "results = pd.concat(frames)"
804
+ ]
805
+ },
806
+ {
807
+ "cell_type": "code",
808
+ "execution_count": null,
809
+ "id": "1610bb48-c9b9-4bee-bcb5-999886acb9e3",
810
+ "metadata": {},
811
+ "outputs": [],
812
+ "source": [
813
+ "bar = alt.Chart(results).encode( \n",
814
+ " alt.X('prediction:Q'), \n",
815
+ " alt.Y('Country:N', sort=\"-x\"),\n",
816
+ " color=alt.Color('seed:N', legend=alt.Legend(title=\"Seed or Alternative\")),\n",
817
+ " size='seed:N',\n",
818
+ " tooltip=('Country','prediction')\n",
819
+ ").mark_circle().properties(width=300).add_selection(single_nearest)\n",
820
+ "\n",
821
+ "bar"
822
+ ]
823
+ },
824
+ {
825
+ "cell_type": "markdown",
826
+ "id": "84c40b74-95be-4c19-bd57-74e6004b950c",
827
+ "metadata": {},
828
+ "source": [
829
+ "#### QA"
830
+ ]
831
+ },
832
+ {
833
+ "cell_type": "code",
834
+ "execution_count": null,
835
+ "id": "7d15c7d8-9fdb-4c5b-84fa-599839cbceac",
836
+ "metadata": {},
837
+ "outputs": [],
838
+ "source": [
839
+ "qa_txt = \"They serve halal food in Iraq and Egypt.\"\n",
840
+ "qa_doc = nlp(qa_txt)"
841
+ ]
842
+ },
843
+ {
844
+ "cell_type": "code",
845
+ "execution_count": null,
846
+ "id": "d6956ddf-9287-419a-bb08-a3618f77700a",
847
+ "metadata": {},
848
+ "outputs": [],
849
+ "source": [
850
+ "displacy.render(qa_doc, style=\"dep\")"
851
+ ]
852
+ },
853
+ {
854
+ "cell_type": "code",
855
+ "execution_count": null,
856
+ "id": "88768d68-fe44-49ab-ac12-d41e6716b3b3",
857
+ "metadata": {},
858
+ "outputs": [],
859
+ "source": [
860
+ "select_crit(qa_doc)"
861
+ ]
862
+ },
863
+ {
864
+ "cell_type": "markdown",
865
+ "id": "7bbc6c2e-df5d-4076-8532-8648fd818be4",
866
+ "metadata": {},
867
+ "source": [
868
+ "# NLC-Gen\n",
869
+ "### A Natural Language Counterfactual Generator for Exploring Bias in Sentiment Analysis Algorithms\n",
870
+ "\n",
871
+ "##### Overview\n",
872
+ "This project is an extension of [Interactive Model Cards](https://github.com/amcrisan/interactive-model-cards). It focuses on providing a person more ways to explore the bias of a model through the generation of alternatives (technically [counterfactuals](https://plato.stanford.edu/entries/counterfactuals/#WhatCoun)). We believe the use of alternatives people can better understand the limitations of a model and develop productive skepticism around its usage and trustworthiness.\n",
873
+ "\n",
874
+ "##### Set up\n",
875
+ "\n",
876
+ "Download the files from Github then perform the commands below in \n",
877
+ "```sh\n",
878
+ "cd NLC-Gen\n",
879
+ "pipenv install\n",
880
+ "pipenv shell\n",
881
+ "python -m spacy download en_core_web_lg\n",
882
+ "streamlit run NLC-app.py\n",
883
+ "```\n",
884
+ "\n",
885
+ "##### Known Limitations\n",
886
+ "* Words not in the spaCy vocab for `en_core_web_lg` won't have vectors and so won't have the ability to create similarity scores.\n",
887
+ "* WordNet provides many limitations due to its age and lack of funding for ongoing maintenance. It provides access to a large variety of the English language but certain words simply do not exist.\n",
888
+ "* There are currently only 2 lists (Countries and Professions). We would like to find community curated lists for: Race, Sexual Orientation and Gender Identity (SOGI), Religion, age, and protected status.\n",
889
+ "\n",
890
+ "\n",
891
+ "##### Key Dependencies and Packages\n",
892
+ "\n",
893
+ "1. [Hugging Face Transformers](https://huggingface.co/) - the model we've designed this iteration for is hosted on hugging face. It is: [distilbert-base-uncased-finetuned-sst-2-english](https://huggingface.co/distilbert-base-uncased-finetuned-sst-2-english).\n",
894
+ "2. [Streamlit](https://streamlit.io) - This is the library we're using to build the prototype app because it is easy to stand up and quick to fix.\n",
895
+ "3. [spaCy](https://spacy.io) - This is the main NLP Library we're using and it runs most of the text manipulation we're doing as part of the project.\n",
896
+ "4. [NLTK + WordNet](https://www.nltk.org/howto/wordnet.html) - This is the initial lexical database we're using because it is accessible directly through Python and it is free. We will be considering a move to [ConceptNet](https://conceptnet.io/) for future iterations based on better lateral movement across edges.\n",
897
+ "5. [Lime](https://github.com/marcotcr/lime) - We chose Lime over Shap because Lime has more of the functionality we need. Shap appears to provide greater performance but is not as easily suited to our original designs.\n",
898
+ "6. [Altair](https://altair-viz.github.io/user_guide/encoding.html) - We're using Altair because it's well integrated into Streamlit.\n",
899
+ "\n",
900
+ "\n",
901
+ "\n"
902
+ ]
903
+ },
904
+ {
905
+ "cell_type": "code",
906
+ "execution_count": null,
907
+ "id": "fa224bed-3630-4485-8dbc-670aaf5e6b0a",
908
+ "metadata": {},
909
+ "outputs": [],
910
+ "source": []
911
+ }
912
+ ],
913
+ "metadata": {
914
+ "kernelspec": {
915
+ "display_name": "Python 3 (ipykernel)",
916
+ "language": "python",
917
+ "name": "python3"
918
+ },
919
+ "language_info": {
920
+ "codemirror_mode": {
921
+ "name": "ipython",
922
+ "version": 3
923
+ },
924
+ "file_extension": ".py",
925
+ "mimetype": "text/x-python",
926
+ "name": "python",
927
+ "nbconvert_exporter": "python",
928
+ "pygments_lexer": "ipython3",
929
+ "version": "3.8.8"
930
+ }
931
+ },
932
+ "nbformat": 4,
933
+ "nbformat_minor": 5
934
+ }
VizNLC-gen-pipeline.ipynb ADDED
@@ -0,0 +1,1175 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "id": "8ea54fcd-ef4a-42cb-ae26-cbdc6f6ffc64",
6
+ "metadata": {
7
+ "tags": []
8
+ },
9
+ "source": [
10
+ "# Duct Tape Pipeline\n",
11
+ "To explore how users may interact with interactive visualizations of counterfactuals for evolving the Interactive Model Card, we will need to first find a way to generate counterfactuals based on a given input. We want the user to be able to provide their input and direct the system to generate counterfactuals based on a part of speech that is significant to the model. The system should then provide a data frame of counterfactuals to be used in an interactive visualization. Below is an example wireframe of the experience based on previous research.\n",
12
+ "\n",
13
+ "![wireframe](Assets/VizNLC-Wireframe-example.png)\n",
14
+ "\n",
15
+ "## Goals of this notebook\n",
16
+ "* Clean up the flow in the \"duct tape pipeline\".\n",
17
+ "* See if I can extract the LIME list for visualization"
18
+ ]
19
+ },
20
+ {
21
+ "cell_type": "markdown",
22
+ "id": "736e6375-dd6d-4188-b8b1-92bded2bcd02",
23
+ "metadata": {},
24
+ "source": [
25
+ "## Loading the libraries and models"
26
+ ]
27
+ },
28
+ {
29
+ "cell_type": "code",
30
+ "execution_count": 1,
31
+ "id": "7f581785-e642-4f74-9f67-06a63820eaf2",
32
+ "metadata": {},
33
+ "outputs": [],
34
+ "source": [
35
+ "#Import the libraries we know we'll need for the Generator.\n",
36
+ "import pandas as pd, spacy, nltk, numpy as np\n",
37
+ "from spacy import displacy\n",
38
+ "from spacy.matcher import Matcher\n",
39
+ "#!python -m spacy download en_core_web_sm\n",
40
+ "nlp = spacy.load(\"en_core_web_md\")\n",
41
+ "lemmatizer = nlp.get_pipe(\"lemmatizer\")\n",
42
+ "\n",
43
+ "#Import the libraries to support the model, predictions, and LIME.\n",
44
+ "from transformers import AutoTokenizer, AutoModelForSequenceClassification, TextClassificationPipeline\n",
45
+ "import lime\n",
46
+ "import torch\n",
47
+ "import torch.nn.functional as F\n",
48
+ "from lime.lime_text import LimeTextExplainer\n",
49
+ "\n",
50
+ "#Import the libraries for generating interactive visualizations.\n",
51
+ "import altair as alt"
52
+ ]
53
+ },
54
+ {
55
+ "cell_type": "code",
56
+ "execution_count": 2,
57
+ "id": "cbe2b292-e33e-4915-8e61-bba5327fb643",
58
+ "metadata": {},
59
+ "outputs": [],
60
+ "source": [
61
+ "#Defining all necessary variables and instances.\n",
62
+ "tokenizer = AutoTokenizer.from_pretrained(\"distilbert-base-uncased-finetuned-sst-2-english\")\n",
63
+ "model = AutoModelForSequenceClassification.from_pretrained(\"distilbert-base-uncased-finetuned-sst-2-english\")\n",
64
+ "class_names = ['negative', 'positive']\n",
65
+ "explainer = LimeTextExplainer(class_names=class_names)"
66
+ ]
67
+ },
68
+ {
69
+ "cell_type": "code",
70
+ "execution_count": 3,
71
+ "id": "197c3e26-0fdf-49c6-9135-57f1fd55d3e3",
72
+ "metadata": {},
73
+ "outputs": [],
74
+ "source": [
75
+ "#Defining a Predictor required for LIME to function.\n",
76
+ "def predictor(texts):\n",
77
+ " outputs = model(**tokenizer(texts, return_tensors=\"pt\", padding=True))\n",
78
+ " probas = F.softmax(outputs.logits, dim=1).detach().numpy()\n",
79
+ " return probas"
80
+ ]
81
+ },
82
+ {
83
+ "cell_type": "code",
84
+ "execution_count": 4,
85
+ "id": "013af6ac-f7d1-41d2-a601-b0f9a4870815",
86
+ "metadata": {},
87
+ "outputs": [],
88
+ "source": [
89
+ "#Instantiate a matcher and use it to test some patterns.\n",
90
+ "matcher = Matcher(nlp.vocab)\n",
91
+ "pattern = [{\"ENT_TYPE\": {\"IN\":[\"NORP\",\"GPE\"]}}]\n",
92
+ "matcher.add(\"proper_noun\", [pattern])\n",
93
+ "pattern_test = [{\"DEP\": \"amod\"},{\"DEP\":\"attr\"},{\"TEXT\":\"-\"},{\"DEP\":\"attr\",\"OP\":\"+\"}]\n",
94
+ "matcher.add(\"amod_attr\",[pattern_test])\n",
95
+ "pattern_an = [{\"DEP\": \"amod\"},{\"POS\":{\"IN\":[\"NOUN\",\"PROPN\"]}},{\"DEP\":{\"NOT_IN\":[\"attr\"]}}]\n",
96
+ "matcher.add(\"amod_noun\", [pattern_an])"
97
+ ]
98
+ },
99
+ {
100
+ "cell_type": "code",
101
+ "execution_count": 5,
102
+ "id": "f6ac821d-7b56-446e-b9ca-42a5f5afd198",
103
+ "metadata": {},
104
+ "outputs": [],
105
+ "source": [
106
+ "def match_this(matcher, doc):\n",
107
+ " matches = matcher(doc)\n",
108
+ " for match_id, start, end in matches:\n",
109
+ " matched_span = doc[start:end]\n",
110
+ " print(f\"Mached {matched_span.text} by the rule {nlp.vocab.strings[match_id]}.\")\n",
111
+ " return matches"
112
+ ]
113
+ },
114
+ {
115
+ "cell_type": "markdown",
116
+ "id": "c23d48c4-f5ab-4428-9244-0786e9903a8e",
117
+ "metadata": {
118
+ "tags": []
119
+ },
120
+ "source": [
121
+ "## Building the Duct-Tape Pipeline cell-by-cell"
122
+ ]
123
+ },
124
+ {
125
+ "cell_type": "code",
126
+ "execution_count": 6,
127
+ "id": "a373fc00-401a-4def-9f09-de73d485ac13",
128
+ "metadata": {},
129
+ "outputs": [],
130
+ "source": [
131
+ "gender = [\"man\", \"woman\",\"girl\",\"boy\",\"male\",\"female\",\"husband\",\"wife\",\"girlfriend\",\"boyfriend\",\"brother\",\"sister\",\"aunt\",\"uncle\",\"grandma\",\"grandpa\",\"granny\",\"granps\",\"grandmother\",\"grandfather\",\"mama\",\"dada\",\"Ma\",\"Pa\",\"lady\",\"gentleman\"]"
132
+ ]
133
+ },
134
+ {
135
+ "cell_type": "code",
136
+ "execution_count": 7,
137
+ "id": "8b02a5d4-8a6b-4e5e-8f15-4f9182fe341f",
138
+ "metadata": {},
139
+ "outputs": [],
140
+ "source": [
141
+ "def select_crit(document, options=False, limelist=False):\n",
142
+ " '''This function is meant to select the critical part of a sentence. Critical, in this context means\n",
143
+ " the part of the sentence that is either: A) a PROPN from the correct entity group; B) an ADJ associated with a NOUN;\n",
144
+ " C) a NOUN that represents gender. It also checks this against what the model thinks is important if the user defines \"options\" as \"LIME\" or True.'''\n",
145
+ " chunks = list(document.noun_chunks)\n",
146
+ " pos_options = []\n",
147
+ " lime_options = []\n",
148
+ " \n",
149
+ " #Identify what the model cares about.\n",
150
+ " if options:\n",
151
+ " exp = explainer.explain_instance(document.text, predictor, num_features=15, num_samples=2000)\n",
152
+ " lime_results = exp.as_list()\n",
153
+ " #prints the results from lime for QA.\n",
154
+ " if limelist == True:\n",
155
+ " print(lime_results)\n",
156
+ " for feature in lime_results:\n",
157
+ " lime_options.append(feature[0])\n",
158
+ " lime_results = pd.DataFrame(lime_results, columns=[\"Word\",\"Weight\"])\n",
159
+ " \n",
160
+ " #Identify what we care about \"parts of speech\"\n",
161
+ " for chunk in chunks:\n",
162
+ " #The use of chunk[-1] is due to testing that it appears to always match the root\n",
163
+ " root = chunk[-1]\n",
164
+ " #This currently matches to a list I've created. I don't know the best way to deal with this so I'm leaving it as is for the moment.\n",
165
+ " if root.text.lower() in gender:\n",
166
+ " cur_values = [token.text for token in chunk if token.pos_ in [\"NOUN\",\"ADJ\"]]\n",
167
+ " if (all(elem in lime_options for elem in cur_values) and ((options == \"LIME\") or (options == True))) or ((options != \"LIME\") and (options != True)):\n",
168
+ " pos_options.extend(cur_values)\n",
169
+ " #print(f\"From {chunk.text}, {cur_values} added to pos_options due to gender.\") #for QA\n",
170
+ " #This is currently set to pick up entities in a particular set of groups (which I recently expanded). Should it just pick up all named entities?\n",
171
+ " elif root.ent_type_ in [\"GPE\",\"NORP\",\"DATE\",\"EVENT\"]:\n",
172
+ " cur_values = []\n",
173
+ " if (len(chunk) > 1) and (chunk[-2].dep_ == \"compound\"):\n",
174
+ " #creates the compound element of the noun\n",
175
+ " compound = [x.text for x in chunk if x.dep_ == \"compound\"]\n",
176
+ " print(f\"This is the contents of {compound} and it is {all(elem in lime_options for elem in compound)} that all elements are present in {lime_options}.\") #for QA\n",
177
+ " #checks to see all elements in the compound are important to the model or use the compound if not checking importance.\n",
178
+ " if (all(elem in lime_options for elem in compound) and ((options == \"LIME\") or (options == True))) or ((options != \"LIME\") and (options != True)):\n",
179
+ " #creates a span for the entirety of the compound noun and adds it to the list.\n",
180
+ " span = -1 * (1 + len(compound))\n",
181
+ " pos_options.append(chunk[span:].text)\n",
182
+ " cur_values + [token.text for token in chunk if token.pos_ == \"ADJ\"]\n",
183
+ " else: \n",
184
+ " cur_values = [token.text for token in chunk if (token.ent_type_ in [\"GPE\",\"NORP\",\"DATE\",\"EVENT\"]) or (token.pos_ == \"ADJ\")]\n",
185
+ " if (all(elem in lime_options for elem in cur_values) and ((options == \"LIME\") or (options == True))) or ((options != \"LIME\") and (options != True)):\n",
186
+ " pos_options.extend(cur_values)\n",
187
+ " print(f\"From {chunk.text}, {cur_values} and {pos_options} added to pos_options due to entity recognition.\") #for QA\n",
188
+ " elif len(chunk) > 1:\n",
189
+ " cur_values = [token.text for token in chunk if token.pos_ in [\"NOUN\",\"ADJ\"]]\n",
190
+ " if (all(elem in lime_options for elem in cur_values) and ((options == \"LIME\") or (options == True))) or ((options != \"LIME\") and (options != True)):\n",
191
+ " pos_options.extend(cur_values)\n",
192
+ " print(f\"From {chunk.text}, {cur_values} added to pos_options due to wildcard.\") #for QA\n",
193
+ " else:\n",
194
+ " print(f\"No options added for \\'{chunk.text}\\' \")\n",
195
+ " \n",
196
+ " \n",
197
+ " #Return the correct set of options based on user input, defaults to POS for simplicity.\n",
198
+ " if options == \"LIME\":\n",
199
+ " return pos_options, lime_results\n",
200
+ " else:\n",
201
+ " return pos_options"
202
+ ]
203
+ },
204
+ {
205
+ "cell_type": "code",
206
+ "execution_count": 8,
207
+ "id": "d43e202e-64b9-4cea-b117-82492c9ee5f4",
208
+ "metadata": {},
209
+ "outputs": [
210
+ {
211
+ "name": "stdout",
212
+ "output_type": "stream",
213
+ "text": [
214
+ "From This film, ['film'] added to pos_options due to wildcard.\n",
215
+ "From Iraq, ['Iraq'] and ['film', 'Iraq'] added to pos_options due to entity recognition.\n"
216
+ ]
217
+ }
218
+ ],
219
+ "source": [
220
+ "#Test to make sure all three options work\n",
221
+ "text4 = \"This film was filmed in Iraq.\"\n",
222
+ "doc4 = nlp(text4)\n",
223
+ "lime4, limedf = select_crit(doc4,options=\"LIME\")"
224
+ ]
225
+ },
226
+ {
227
+ "cell_type": "code",
228
+ "execution_count": 9,
229
+ "id": "a0e55a24-65df-429e-a0cd-8daf91a5d242",
230
+ "metadata": {},
231
+ "outputs": [
232
+ {
233
+ "data": {
234
+ "text/html": [
235
+ "\n",
236
+ "<div id=\"altair-viz-23e37c16acf34cbead4ebdbe2bddfdb5\"></div>\n",
237
+ "<script type=\"text/javascript\">\n",
238
+ " var VEGA_DEBUG = (typeof VEGA_DEBUG == \"undefined\") ? {} : VEGA_DEBUG;\n",
239
+ " (function(spec, embedOpt){\n",
240
+ " let outputDiv = document.currentScript.previousElementSibling;\n",
241
+ " if (outputDiv.id !== \"altair-viz-23e37c16acf34cbead4ebdbe2bddfdb5\") {\n",
242
+ " outputDiv = document.getElementById(\"altair-viz-23e37c16acf34cbead4ebdbe2bddfdb5\");\n",
243
+ " }\n",
244
+ " const paths = {\n",
245
+ " \"vega\": \"https://cdn.jsdelivr.net/npm//vega@5?noext\",\n",
246
+ " \"vega-lib\": \"https://cdn.jsdelivr.net/npm//vega-lib?noext\",\n",
247
+ " \"vega-lite\": \"https://cdn.jsdelivr.net/npm//vega-lite@4.17.0?noext\",\n",
248
+ " \"vega-embed\": \"https://cdn.jsdelivr.net/npm//vega-embed@6?noext\",\n",
249
+ " };\n",
250
+ "\n",
251
+ " function maybeLoadScript(lib, version) {\n",
252
+ " var key = `${lib.replace(\"-\", \"\")}_version`;\n",
253
+ " return (VEGA_DEBUG[key] == version) ?\n",
254
+ " Promise.resolve(paths[lib]) :\n",
255
+ " new Promise(function(resolve, reject) {\n",
256
+ " var s = document.createElement('script');\n",
257
+ " document.getElementsByTagName(\"head\")[0].appendChild(s);\n",
258
+ " s.async = true;\n",
259
+ " s.onload = () => {\n",
260
+ " VEGA_DEBUG[key] = version;\n",
261
+ " return resolve(paths[lib]);\n",
262
+ " };\n",
263
+ " s.onerror = () => reject(`Error loading script: ${paths[lib]}`);\n",
264
+ " s.src = paths[lib];\n",
265
+ " });\n",
266
+ " }\n",
267
+ "\n",
268
+ " function showError(err) {\n",
269
+ " outputDiv.innerHTML = `<div class=\"error\" style=\"color:red;\">${err}</div>`;\n",
270
+ " throw err;\n",
271
+ " }\n",
272
+ "\n",
273
+ " function displayChart(vegaEmbed) {\n",
274
+ " vegaEmbed(outputDiv, spec, embedOpt)\n",
275
+ " .catch(err => showError(`Javascript Error: ${err.message}<br>This usually means there's a typo in your chart specification. See the javascript console for the full traceback.`));\n",
276
+ " }\n",
277
+ "\n",
278
+ " if(typeof define === \"function\" && define.amd) {\n",
279
+ " requirejs.config({paths});\n",
280
+ " require([\"vega-embed\"], displayChart, err => showError(`Error loading script: ${err.message}`));\n",
281
+ " } else {\n",
282
+ " maybeLoadScript(\"vega\", \"5\")\n",
283
+ " .then(() => maybeLoadScript(\"vega-lite\", \"4.17.0\"))\n",
284
+ " .then(() => maybeLoadScript(\"vega-embed\", \"6\"))\n",
285
+ " .catch(showError)\n",
286
+ " .then(() => displayChart(vegaEmbed));\n",
287
+ " }\n",
288
+ " })({\"config\": {\"view\": {\"continuousWidth\": 400, \"continuousHeight\": 300, \"strokeWidth\": 0}, \"axis\": {\"grid\": false}}, \"layer\": [{\"mark\": \"bar\", \"encoding\": {\"color\": {\"field\": \"Weight\", \"legend\": null, \"scale\": {\"domain\": [0], \"range\": \"diverging\", \"scheme\": \"blueorange\", \"type\": \"threshold\"}, \"type\": \"quantitative\"}, \"tooltip\": [{\"field\": \"Word\", \"type\": \"nominal\"}, {\"field\": \"Weight\", \"type\": \"quantitative\"}], \"x\": {\"field\": \"Weight\", \"scale\": {\"domain\": [-1, 1]}, \"type\": \"quantitative\"}, \"y\": {\"axis\": null, \"field\": \"Word\", \"sort\": \"x\", \"type\": \"nominal\"}}, \"title\": \"Importance of individual words\"}, {\"mark\": {\"type\": \"text\", \"align\": \"right\", \"baseline\": \"middle\", \"fill\": \"black\"}, \"encoding\": {\"color\": {\"field\": \"Weight\", \"legend\": null, \"scale\": {\"domain\": [0], \"range\": \"diverging\", \"scheme\": \"blueorange\", \"type\": \"threshold\"}, \"type\": \"quantitative\"}, \"text\": {\"field\": \"Word\", \"type\": \"nominal\"}, \"tooltip\": [{\"field\": \"Word\", \"type\": \"nominal\"}, {\"field\": \"Weight\", \"type\": \"quantitative\"}], \"x\": {\"field\": \"Weight\", \"scale\": {\"domain\": [-1, 1]}, \"type\": \"quantitative\"}, \"y\": {\"axis\": null, \"field\": \"Word\", \"sort\": \"x\", \"type\": \"nominal\"}}, \"title\": \"Importance of individual words\"}], \"data\": {\"name\": \"data-1b001587c028498e70538ed310063e51\"}, \"width\": 300, \"$schema\": \"https://vega.github.io/schema/vega-lite/v4.17.0.json\", \"datasets\": {\"data-1b001587c028498e70538ed310063e51\": [{\"Word\": \"Iraq\", \"Weight\": -0.9358529031331603}, {\"Word\": \"was\", \"Weight\": -0.0358845002692577}, {\"Word\": \"in\", \"Weight\": -0.017416213388210394}, {\"Word\": \"filmed\", \"Weight\": 0.00802450706528586}, {\"Word\": \"film\", \"Weight\": 0.0077573875142285895}, {\"Word\": \"This\", \"Weight\": 0.0031263867499817305}]}}, {\"mode\": \"vega-lite\"});\n",
289
+ "</script>"
290
+ ],
291
+ "text/plain": [
292
+ "alt.LayerChart(...)"
293
+ ]
294
+ },
295
+ "execution_count": 9,
296
+ "metadata": {},
297
+ "output_type": "execute_result"
298
+ }
299
+ ],
300
+ "source": [
301
+ "single_nearest = alt.selection_single(on='mouseover', nearest=True)\n",
302
+ "viz = alt.Chart(limedf).encode(\n",
303
+ " alt.X('Weight:Q', scale=alt.Scale(domain=(-1, 1))),\n",
304
+ " alt.Y('Word:N', sort='x', axis=None),\n",
305
+ " color=alt.Color(\"Weight\", scale=alt.Scale(scheme='blueorange', domain=[0], type=\"threshold\", range='diverging'), legend=None),\n",
306
+ " tooltip = (\"Word\",\"Weight\")\n",
307
+ ").mark_bar().properties(title =\"Importance of individual words\")\n",
308
+ "\n",
309
+ "text = viz.mark_text(\n",
310
+ " fill=\"black\",\n",
311
+ " align='right',\n",
312
+ " baseline='middle'\n",
313
+ ").encode(\n",
314
+ " text='Word:N'\n",
315
+ ")\n",
316
+ "limeplot = alt.LayerChart(layer=[viz,text], width = 300).configure_axis(grid=False).configure_view(strokeWidth=0)\n",
317
+ "limeplot"
318
+ ]
319
+ },
320
+ {
321
+ "cell_type": "markdown",
322
+ "id": "bf0512b6-336e-4842-9bde-34e03a1ca7c6",
323
+ "metadata": {},
324
+ "source": [
325
+ "### Testing predictions and visualization\n",
326
+ "Here I will attempt to import the model from huggingface, generate predictions for each of the sentences, and then visualize those predictions into a dot plot. If I can get this to work then I will move on to testing a full pipeline for letting the user pick which part of the sentence they wish to generate counterfactuals for."
327
+ ]
328
+ },
329
+ {
330
+ "cell_type": "code",
331
+ "execution_count": 10,
332
+ "id": "74c639bb-e74a-4a46-8047-3552265ae6a4",
333
+ "metadata": {},
334
+ "outputs": [],
335
+ "source": [
336
+ "#Discovering that there's a pipeline specifically to provide scores. \n",
337
+ "#I used it to get a list of lists of dictionaries that I can then manipulate to calculate the proper prediction score.\n",
338
+ "pipe = TextClassificationPipeline(model=model, tokenizer=tokenizer, return_all_scores=True)"
339
+ ]
340
+ },
341
+ {
342
+ "cell_type": "code",
343
+ "execution_count": 11,
344
+ "id": "8726a284-99bd-47f1-9756-1c3ae603db10",
345
+ "metadata": {},
346
+ "outputs": [],
347
+ "source": [
348
+ "def eval_pred(text):\n",
349
+ " '''A basic function for evaluating the prediction from the model and turning it into a visualization friendly number.'''\n",
350
+ " preds = pipe(text)\n",
351
+ " neg_score = preds[0][0]['score']\n",
352
+ " pos_score = preds[0][1]['score']\n",
353
+ " if pos_score >= neg_score:\n",
354
+ " return pos_score\n",
355
+ " if neg_score >= pos_score:\n",
356
+ " return -1 * neg_score"
357
+ ]
358
+ },
359
+ {
360
+ "cell_type": "code",
361
+ "execution_count": 12,
362
+ "id": "f38f5061-f30a-4c81-9465-37951c3ad9f4",
363
+ "metadata": {},
364
+ "outputs": [],
365
+ "source": [
366
+ "def eval_pred_test(text, return_all = False):\n",
367
+ " '''A basic function for evaluating the prediction from the model and turning it into a visualization friendly number.'''\n",
368
+ " preds = pipe(text)\n",
369
+ " neg_score = -1 * preds[0][0]['score']\n",
370
+ " sent_neg = preds[0][0]['label']\n",
371
+ " pos_score = preds[0][1]['score']\n",
372
+ " sent_pos = preds[0][1]['label']\n",
373
+ " prediction = 0\n",
374
+ " sentiment = ''\n",
375
+ " if pos_score > abs(neg_score):\n",
376
+ " prediction = pos_score\n",
377
+ " sentiment = sent_pos\n",
378
+ " elif abs(neg_score) > pos_score:\n",
379
+ " prediction = neg_score\n",
380
+ " sentiment = sent_neg\n",
381
+ " \n",
382
+ " if return_all:\n",
383
+ " return prediction, sentiment\n",
384
+ " else:\n",
385
+ " return prediction"
386
+ ]
387
+ },
388
+ {
389
+ "cell_type": "markdown",
390
+ "id": "8b349a87-fe83-4045-a63a-d054489bb461",
391
+ "metadata": {},
392
+ "source": [
393
+ "## Load the dummy countries I created to test generating counterfactuals\n",
394
+ "I decided to test the pipeline with a known problem space. Taking the text from Aurélien Géron's observations in twitter, I built a built a small scale test using the learnings I had to prove that we can identify a particular part of speech, use it to generate counterfactuals, and then build a visualization off it."
395
+ ]
396
+ },
397
+ {
398
+ "cell_type": "code",
399
+ "execution_count": 13,
400
+ "id": "46ab3332-964c-449f-8cef-a9ff7df397a4",
401
+ "metadata": {},
402
+ "outputs": [
403
+ {
404
+ "data": {
405
+ "text/html": [
406
+ "<div>\n",
407
+ "<style scoped>\n",
408
+ " .dataframe tbody tr th:only-of-type {\n",
409
+ " vertical-align: middle;\n",
410
+ " }\n",
411
+ "\n",
412
+ " .dataframe tbody tr th {\n",
413
+ " vertical-align: top;\n",
414
+ " }\n",
415
+ "\n",
416
+ " .dataframe thead th {\n",
417
+ " text-align: right;\n",
418
+ " }\n",
419
+ "</style>\n",
420
+ "<table border=\"1\" class=\"dataframe\">\n",
421
+ " <thead>\n",
422
+ " <tr style=\"text-align: right;\">\n",
423
+ " <th></th>\n",
424
+ " <th>Country</th>\n",
425
+ " <th>Continent</th>\n",
426
+ " </tr>\n",
427
+ " </thead>\n",
428
+ " <tbody>\n",
429
+ " <tr>\n",
430
+ " <th>0</th>\n",
431
+ " <td>Algeria</td>\n",
432
+ " <td>Africa</td>\n",
433
+ " </tr>\n",
434
+ " <tr>\n",
435
+ " <th>1</th>\n",
436
+ " <td>Angola</td>\n",
437
+ " <td>Africa</td>\n",
438
+ " </tr>\n",
439
+ " <tr>\n",
440
+ " <th>2</th>\n",
441
+ " <td>Benin</td>\n",
442
+ " <td>Africa</td>\n",
443
+ " </tr>\n",
444
+ " <tr>\n",
445
+ " <th>3</th>\n",
446
+ " <td>Botswana</td>\n",
447
+ " <td>Africa</td>\n",
448
+ " </tr>\n",
449
+ " <tr>\n",
450
+ " <th>4</th>\n",
451
+ " <td>Burkina</td>\n",
452
+ " <td>Africa</td>\n",
453
+ " </tr>\n",
454
+ " </tbody>\n",
455
+ "</table>\n",
456
+ "</div>"
457
+ ],
458
+ "text/plain": [
459
+ " Country Continent\n",
460
+ "0 Algeria Africa\n",
461
+ "1 Angola Africa\n",
462
+ "2 Benin Africa\n",
463
+ "3 Botswana Africa\n",
464
+ "4 Burkina Africa"
465
+ ]
466
+ },
467
+ "execution_count": 13,
468
+ "metadata": {},
469
+ "output_type": "execute_result"
470
+ }
471
+ ],
472
+ "source": [
473
+ "#load my test data from https://github.com/dbouquin/IS_608/blob/master/NanosatDB_munging/Countries-Continents.csv\n",
474
+ "df = pd.read_csv(\"Assets/Countries/countries.csv\")\n",
475
+ "df.head()"
476
+ ]
477
+ },
478
+ {
479
+ "cell_type": "code",
480
+ "execution_count": 14,
481
+ "id": "51c75894-80af-4625-8ce8-660e500b496b",
482
+ "metadata": {},
483
+ "outputs": [
484
+ {
485
+ "name": "stdout",
486
+ "output_type": "stream",
487
+ "text": [
488
+ "From This film, ['film'] added to pos_options due to wildcard.\n",
489
+ "From Iraq, ['Iraq'] and ['film', 'Iraq'] added to pos_options due to entity recognition.\n",
490
+ "['film', 'Iraq']\n"
491
+ ]
492
+ },
493
+ {
494
+ "data": {
495
+ "text/plain": [
496
+ "'Iraq'"
497
+ ]
498
+ },
499
+ "execution_count": 14,
500
+ "metadata": {},
501
+ "output_type": "execute_result"
502
+ }
503
+ ],
504
+ "source": [
505
+ "#Note: we will need to build the function that lets the user choose from the options available. For now I have hard coded it as \"selection\", from \"user_options\".\n",
506
+ "user_options = select_crit(doc4)\n",
507
+ "print(user_options)\n",
508
+ "selection = user_options[1]\n",
509
+ "selection"
510
+ ]
511
+ },
512
+ {
513
+ "cell_type": "code",
514
+ "execution_count": 15,
515
+ "id": "3d6419f1-bf7d-44bc-afb8-ac26ef9002df",
516
+ "metadata": {},
517
+ "outputs": [
518
+ {
519
+ "data": {
520
+ "text/html": [
521
+ "<div>\n",
522
+ "<style scoped>\n",
523
+ " .dataframe tbody tr th:only-of-type {\n",
524
+ " vertical-align: middle;\n",
525
+ " }\n",
526
+ "\n",
527
+ " .dataframe tbody tr th {\n",
528
+ " vertical-align: top;\n",
529
+ " }\n",
530
+ "\n",
531
+ " .dataframe thead th {\n",
532
+ " text-align: right;\n",
533
+ " }\n",
534
+ "</style>\n",
535
+ "<table border=\"1\" class=\"dataframe\">\n",
536
+ " <thead>\n",
537
+ " <tr style=\"text-align: right;\">\n",
538
+ " <th></th>\n",
539
+ " <th>Country</th>\n",
540
+ " <th>Continent</th>\n",
541
+ " <th>text</th>\n",
542
+ " <th>prediction</th>\n",
543
+ " <th>seed</th>\n",
544
+ " </tr>\n",
545
+ " </thead>\n",
546
+ " <tbody>\n",
547
+ " <tr>\n",
548
+ " <th>0</th>\n",
549
+ " <td>Algeria</td>\n",
550
+ " <td>Africa</td>\n",
551
+ " <td>This film was filmed in Algeria.</td>\n",
552
+ " <td>0.806454</td>\n",
553
+ " <td>alternative</td>\n",
554
+ " </tr>\n",
555
+ " <tr>\n",
556
+ " <th>1</th>\n",
557
+ " <td>Angola</td>\n",
558
+ " <td>Africa</td>\n",
559
+ " <td>This film was filmed in Angola.</td>\n",
560
+ " <td>-0.775854</td>\n",
561
+ " <td>alternative</td>\n",
562
+ " </tr>\n",
563
+ " <tr>\n",
564
+ " <th>2</th>\n",
565
+ " <td>Benin</td>\n",
566
+ " <td>Africa</td>\n",
567
+ " <td>This film was filmed in Benin.</td>\n",
568
+ " <td>0.962272</td>\n",
569
+ " <td>alternative</td>\n",
570
+ " </tr>\n",
571
+ " <tr>\n",
572
+ " <th>3</th>\n",
573
+ " <td>Botswana</td>\n",
574
+ " <td>Africa</td>\n",
575
+ " <td>This film was filmed in Botswana.</td>\n",
576
+ " <td>0.785837</td>\n",
577
+ " <td>alternative</td>\n",
578
+ " </tr>\n",
579
+ " <tr>\n",
580
+ " <th>4</th>\n",
581
+ " <td>Burkina</td>\n",
582
+ " <td>Africa</td>\n",
583
+ " <td>This film was filmed in Burkina.</td>\n",
584
+ " <td>0.872980</td>\n",
585
+ " <td>alternative</td>\n",
586
+ " </tr>\n",
587
+ " </tbody>\n",
588
+ "</table>\n",
589
+ "</div>"
590
+ ],
591
+ "text/plain": [
592
+ " Country Continent text prediction \\\n",
593
+ "0 Algeria Africa This film was filmed in Algeria. 0.806454 \n",
594
+ "1 Angola Africa This film was filmed in Angola. -0.775854 \n",
595
+ "2 Benin Africa This film was filmed in Benin. 0.962272 \n",
596
+ "3 Botswana Africa This film was filmed in Botswana. 0.785837 \n",
597
+ "4 Burkina Africa This film was filmed in Burkina. 0.872980 \n",
598
+ "\n",
599
+ " seed \n",
600
+ "0 alternative \n",
601
+ "1 alternative \n",
602
+ "2 alternative \n",
603
+ "3 alternative \n",
604
+ "4 alternative "
605
+ ]
606
+ },
607
+ "execution_count": 15,
608
+ "metadata": {},
609
+ "output_type": "execute_result"
610
+ }
611
+ ],
612
+ "source": [
613
+ "#Create a function that generates the counterfactuals within a data frame.\n",
614
+ "def gen_cf_country(df,document,selection):\n",
615
+ " df['text'] = df.Country.apply(lambda x: document.text.replace(selection,x))\n",
616
+ " df['prediction'] = df.text.apply(eval_pred_test)\n",
617
+ " #added this because I think it will make the end results better if we ensure the seed is in the data we generate counterfactuals from.\n",
618
+ " df['seed'] = df.Country.apply(lambda x: 'seed' if x == selection else 'alternative')\n",
619
+ " return df\n",
620
+ "\n",
621
+ "df = gen_cf_country(df,doc4,selection)\n",
622
+ "df.head()"
623
+ ]
624
+ },
625
+ {
626
+ "cell_type": "code",
627
+ "execution_count": 16,
628
+ "id": "ecb9dd41-2fab-49bd-bae5-30300ce39e41",
629
+ "metadata": {},
630
+ "outputs": [
631
+ {
632
+ "data": {
633
+ "text/html": [
634
+ "\n",
635
+ "<div id=\"altair-viz-b04081e2f48148ebbc743fff61e76f2f\"></div>\n",
636
+ "<script type=\"text/javascript\">\n",
637
+ " var VEGA_DEBUG = (typeof VEGA_DEBUG == \"undefined\") ? {} : VEGA_DEBUG;\n",
638
+ " (function(spec, embedOpt){\n",
639
+ " let outputDiv = document.currentScript.previousElementSibling;\n",
640
+ " if (outputDiv.id !== \"altair-viz-b04081e2f48148ebbc743fff61e76f2f\") {\n",
641
+ " outputDiv = document.getElementById(\"altair-viz-b04081e2f48148ebbc743fff61e76f2f\");\n",
642
+ " }\n",
643
+ " const paths = {\n",
644
+ " \"vega\": \"https://cdn.jsdelivr.net/npm//vega@5?noext\",\n",
645
+ " \"vega-lib\": \"https://cdn.jsdelivr.net/npm//vega-lib?noext\",\n",
646
+ " \"vega-lite\": \"https://cdn.jsdelivr.net/npm//vega-lite@4.17.0?noext\",\n",
647
+ " \"vega-embed\": \"https://cdn.jsdelivr.net/npm//vega-embed@6?noext\",\n",
648
+ " };\n",
649
+ "\n",
650
+ " function maybeLoadScript(lib, version) {\n",
651
+ " var key = `${lib.replace(\"-\", \"\")}_version`;\n",
652
+ " return (VEGA_DEBUG[key] == version) ?\n",
653
+ " Promise.resolve(paths[lib]) :\n",
654
+ " new Promise(function(resolve, reject) {\n",
655
+ " var s = document.createElement('script');\n",
656
+ " document.getElementsByTagName(\"head\")[0].appendChild(s);\n",
657
+ " s.async = true;\n",
658
+ " s.onload = () => {\n",
659
+ " VEGA_DEBUG[key] = version;\n",
660
+ " return resolve(paths[lib]);\n",
661
+ " };\n",
662
+ " s.onerror = () => reject(`Error loading script: ${paths[lib]}`);\n",
663
+ " s.src = paths[lib];\n",
664
+ " });\n",
665
+ " }\n",
666
+ "\n",
667
+ " function showError(err) {\n",
668
+ " outputDiv.innerHTML = `<div class=\"error\" style=\"color:red;\">${err}</div>`;\n",
669
+ " throw err;\n",
670
+ " }\n",
671
+ "\n",
672
+ " function displayChart(vegaEmbed) {\n",
673
+ " vegaEmbed(outputDiv, spec, embedOpt)\n",
674
+ " .catch(err => showError(`Javascript Error: ${err.message}<br>This usually means there's a typo in your chart specification. See the javascript console for the full traceback.`));\n",
675
+ " }\n",
676
+ "\n",
677
+ " if(typeof define === \"function\" && define.amd) {\n",
678
+ " requirejs.config({paths});\n",
679
+ " require([\"vega-embed\"], displayChart, err => showError(`Error loading script: ${err.message}`));\n",
680
+ " } else {\n",
681
+ " maybeLoadScript(\"vega\", \"5\")\n",
682
+ " .then(() => maybeLoadScript(\"vega-lite\", \"4.17.0\"))\n",
683
+ " .then(() => maybeLoadScript(\"vega-embed\", \"6\"))\n",
684
+ " .catch(showError)\n",
685
+ " .then(() => displayChart(vegaEmbed));\n",
686
+ " }\n",
687
+ " })({\"config\": {\"view\": {\"continuousWidth\": 400, \"continuousHeight\": 300}}, \"data\": {\"name\": \"data-d6144c20ed1c104065f226d393d7e424\"}, \"mark\": {\"type\": \"circle\", \"opacity\": 0.5}, \"encoding\": {\"color\": {\"field\": \"seed\", \"legend\": {\"title\": \"Seed or Alternative\"}, \"type\": \"nominal\"}, \"size\": {\"field\": \"seed\", \"type\": \"nominal\"}, \"tooltip\": [{\"field\": \"Country\", \"type\": \"nominal\"}, {\"field\": \"prediction\", \"type\": \"quantitative\"}], \"x\": {\"field\": \"Continent\", \"type\": \"nominal\"}, \"y\": {\"field\": \"prediction\", \"type\": \"quantitative\"}}, \"selection\": {\"selector002\": {\"type\": \"single\", \"on\": \"mouseover\", \"nearest\": true}}, \"width\": 300, \"$schema\": \"https://vega.github.io/schema/vega-lite/v4.17.0.json\", \"datasets\": {\"data-d6144c20ed1c104065f226d393d7e424\": [{\"Country\": \"Algeria\", \"Continent\": \"Africa\", \"text\": \"This film was filmed in Algeria.\", \"prediction\": 0.8064541816711426, \"seed\": \"alternative\"}, {\"Country\": \"Angola\", \"Continent\": \"Africa\", \"text\": \"This film was filmed in Angola.\", \"prediction\": -0.7758541703224182, \"seed\": \"alternative\"}, {\"Country\": \"Benin\", \"Continent\": \"Africa\", \"text\": \"This film was filmed in Benin.\", \"prediction\": 0.9622722268104553, \"seed\": \"alternative\"}, {\"Country\": \"Botswana\", \"Continent\": \"Africa\", \"text\": \"This film was filmed in Botswana.\", \"prediction\": 0.7858365774154663, \"seed\": \"alternative\"}, {\"Country\": \"Burkina\", \"Continent\": \"Africa\", \"text\": \"This film was filmed in Burkina.\", \"prediction\": 0.8729804754257202, \"seed\": \"alternative\"}, {\"Country\": \"Burundi\", \"Continent\": \"Africa\", \"text\": \"This film was filmed in Burundi.\", \"prediction\": -0.6306232810020447, \"seed\": \"alternative\"}, {\"Country\": \"Cameroon\", \"Continent\": \"Africa\", \"text\": \"This film was filmed in Cameroon.\", \"prediction\": 0.5283073782920837, \"seed\": \"alternative\"}, {\"Country\": \"Cape Verde\", \"Continent\": \"Africa\", \"text\": \"This film was filmed in Cape Verde.\", \"prediction\": 0.8932027220726013, \"seed\": \"alternative\"}, {\"Country\": \"Central African Republic\", \"Continent\": \"Africa\", \"text\": \"This film was filmed in Central African Republic.\", \"prediction\": 0.9326885342597961, \"seed\": \"alternative\"}, {\"Country\": \"Chad\", \"Continent\": \"Africa\", \"text\": \"This film was filmed in Chad.\", \"prediction\": 0.788737952709198, \"seed\": \"alternative\"}, {\"Country\": \"Comoros\", \"Continent\": \"Africa\", \"text\": \"This film was filmed in Comoros.\", \"prediction\": 0.9623100757598877, \"seed\": \"alternative\"}, {\"Country\": \"Congo\", \"Continent\": \"Africa\", \"text\": \"This film was filmed in Congo.\", \"prediction\": 0.6309685707092285, \"seed\": \"alternative\"}, {\"Country\": \"Congo, Democratic Republic of\", \"Continent\": \"Africa\", \"text\": \"This film was filmed in Congo, Democratic Republic of.\", \"prediction\": -0.54060298204422, \"seed\": \"alternative\"}, {\"Country\": \"Djibouti\", \"Continent\": \"Africa\", \"text\": \"This film was filmed in Djibouti.\", \"prediction\": 0.8894529938697815, \"seed\": \"alternative\"}, {\"Country\": \"Egypt\", \"Continent\": \"Africa\", \"text\": \"This film was filmed in Egypt.\", \"prediction\": 0.9648140072822571, \"seed\": \"alternative\"}, {\"Country\": \"Equatorial Guinea\", \"Continent\": \"Africa\", \"text\": \"This film was filmed in Equatorial Guinea.\", \"prediction\": 0.6021467447280884, \"seed\": \"alternative\"}, {\"Country\": \"Eritrea\", \"Continent\": \"Africa\", \"text\": \"This film was filmed in Eritrea.\", \"prediction\": 0.5404142141342163, \"seed\": \"alternative\"}, {\"Country\": \"Ethiopia\", \"Continent\": \"Africa\", \"text\": \"This film was filmed in Ethiopia.\", \"prediction\": 0.7997546195983887, \"seed\": \"alternative\"}, {\"Country\": \"Gabon\", \"Continent\": \"Africa\", \"text\": \"This film was filmed in Gabon.\", \"prediction\": -0.8517823219299316, \"seed\": \"alternative\"}, {\"Country\": \"Gambia\", \"Continent\": \"Africa\", \"text\": \"This film was filmed in Gambia.\", \"prediction\": -0.5401656031608582, \"seed\": \"alternative\"}, {\"Country\": \"Ghana\", \"Continent\": \"Africa\", \"text\": \"This film was filmed in Ghana.\", \"prediction\": 0.9684805870056152, \"seed\": \"alternative\"}, {\"Country\": \"Guinea\", \"Continent\": \"Africa\", \"text\": \"This film was filmed in Guinea.\", \"prediction\": 0.6188081502914429, \"seed\": \"alternative\"}, {\"Country\": \"Guinea-Bissau\", \"Continent\": \"Africa\", \"text\": \"This film was filmed in Guinea-Bissau.\", \"prediction\": -0.500963032245636, \"seed\": \"alternative\"}, {\"Country\": \"Ivory Coast\", \"Continent\": \"Africa\", \"text\": \"This film was filmed in Ivory Coast.\", \"prediction\": 0.9872506856918335, \"seed\": \"alternative\"}, {\"Country\": \"Kenya\", \"Continent\": \"Africa\", \"text\": \"This film was filmed in Kenya.\", \"prediction\": 0.9789031744003296, \"seed\": \"alternative\"}, {\"Country\": \"Lesotho\", \"Continent\": \"Africa\", \"text\": \"This film was filmed in Lesotho.\", \"prediction\": 0.6674107313156128, \"seed\": \"alternative\"}, {\"Country\": \"Liberia\", \"Continent\": \"Africa\", \"text\": \"This film was filmed in Liberia.\", \"prediction\": -0.6720185279846191, \"seed\": \"alternative\"}, {\"Country\": \"Libya\", \"Continent\": \"Africa\", \"text\": \"This film was filmed in Libya.\", \"prediction\": 0.53217613697052, \"seed\": \"alternative\"}, {\"Country\": \"Madagascar\", \"Continent\": \"Africa\", \"text\": \"This film was filmed in Madagascar.\", \"prediction\": 0.9730344414710999, \"seed\": \"alternative\"}, {\"Country\": \"Malawi\", \"Continent\": \"Africa\", \"text\": \"This film was filmed in Malawi.\", \"prediction\": -0.7816339135169983, \"seed\": \"alternative\"}, {\"Country\": \"Mali\", \"Continent\": \"Africa\", \"text\": \"This film was filmed in Mali.\", \"prediction\": -0.6651991009712219, \"seed\": \"alternative\"}, {\"Country\": \"Mauritania\", \"Continent\": \"Africa\", \"text\": \"This film was filmed in Mauritania.\", \"prediction\": 0.6149344444274902, \"seed\": \"alternative\"}, {\"Country\": \"Mauritius\", \"Continent\": \"Africa\", \"text\": \"This film was filmed in Mauritius.\", \"prediction\": 0.9310740828514099, \"seed\": \"alternative\"}, {\"Country\": \"Morocco\", \"Continent\": \"Africa\", \"text\": \"This film was filmed in Morocco.\", \"prediction\": 0.9121577143669128, \"seed\": \"alternative\"}, {\"Country\": \"Mozambique\", \"Continent\": \"Africa\", \"text\": \"This film was filmed in Mozambique.\", \"prediction\": -0.7047757506370544, \"seed\": \"alternative\"}, {\"Country\": \"Namibia\", \"Continent\": \"Africa\", \"text\": \"This film was filmed in Namibia.\", \"prediction\": -0.5836523175239563, \"seed\": \"alternative\"}, {\"Country\": \"Niger\", \"Continent\": \"Africa\", \"text\": \"This film was filmed in Niger.\", \"prediction\": -0.6313472390174866, \"seed\": \"alternative\"}, {\"Country\": \"Nigeria\", \"Continent\": \"Africa\", \"text\": \"This film was filmed in Nigeria.\", \"prediction\": 0.7361583113670349, \"seed\": \"alternative\"}, {\"Country\": \"Rwanda\", \"Continent\": \"Africa\", \"text\": \"This film was filmed in Rwanda.\", \"prediction\": -0.7642565965652466, \"seed\": \"alternative\"}, {\"Country\": \"Sao Tome and Principe\", \"Continent\": \"Africa\", \"text\": \"This film was filmed in Sao Tome and Principe.\", \"prediction\": 0.6587044596672058, \"seed\": \"alternative\"}, {\"Country\": \"Senegal\", \"Continent\": \"Africa\", \"text\": \"This film was filmed in Senegal.\", \"prediction\": 0.8155898451805115, \"seed\": \"alternative\"}, {\"Country\": \"Seychelles\", \"Continent\": \"Africa\", \"text\": \"This film was filmed in Seychelles.\", \"prediction\": 0.8802894949913025, \"seed\": \"alternative\"}, {\"Country\": \"Sierra Leone\", \"Continent\": \"Africa\", \"text\": \"This film was filmed in Sierra Leone.\", \"prediction\": 0.9483919143676758, \"seed\": \"alternative\"}, {\"Country\": \"Somalia\", \"Continent\": \"Africa\", \"text\": \"This film was filmed in Somalia.\", \"prediction\": -0.6477505564689636, \"seed\": \"alternative\"}, {\"Country\": \"South Africa\", \"Continent\": \"Africa\", \"text\": \"This film was filmed in South Africa.\", \"prediction\": 0.5048943161964417, \"seed\": \"alternative\"}, {\"Country\": \"South Sudan\", \"Continent\": \"Africa\", \"text\": \"This film was filmed in South Sudan.\", \"prediction\": -0.8506219983100891, \"seed\": \"alternative\"}, {\"Country\": \"Sudan\", \"Continent\": \"Africa\", \"text\": \"This film was filmed in Sudan.\", \"prediction\": -0.8910807967185974, \"seed\": \"alternative\"}, {\"Country\": \"Swaziland\", \"Continent\": \"Africa\", \"text\": \"This film was filmed in Swaziland.\", \"prediction\": 0.7761040925979614, \"seed\": \"alternative\"}, {\"Country\": \"Tanzania\", \"Continent\": \"Africa\", \"text\": \"This film was filmed in Tanzania.\", \"prediction\": 0.669053316116333, \"seed\": \"alternative\"}, {\"Country\": \"Togo\", \"Continent\": \"Africa\", \"text\": \"This film was filmed in Togo.\", \"prediction\": 0.9404287934303284, \"seed\": \"alternative\"}, {\"Country\": \"Tunisia\", \"Continent\": \"Africa\", \"text\": \"This film was filmed in Tunisia.\", \"prediction\": 0.8345948457717896, \"seed\": \"alternative\"}, {\"Country\": \"Uganda\", \"Continent\": \"Africa\", \"text\": \"This film was filmed in Uganda.\", \"prediction\": 0.7823328971862793, \"seed\": \"alternative\"}, {\"Country\": \"Zambia\", \"Continent\": \"Africa\", \"text\": \"This film was filmed in Zambia.\", \"prediction\": -0.6479448080062866, \"seed\": \"alternative\"}, {\"Country\": \"Zimbabwe\", \"Continent\": \"Africa\", \"text\": \"This film was filmed in Zimbabwe.\", \"prediction\": 0.7163158059120178, \"seed\": \"alternative\"}, {\"Country\": \"Afghanistan\", \"Continent\": \"Asia\", \"text\": \"This film was filmed in Afghanistan.\", \"prediction\": -0.8350331783294678, \"seed\": \"alternative\"}, {\"Country\": \"Bahrain\", \"Continent\": \"Asia\", \"text\": \"This film was filmed in Bahrain.\", \"prediction\": 0.9627965092658997, \"seed\": \"alternative\"}, {\"Country\": \"Bangladesh\", \"Continent\": \"Asia\", \"text\": \"This film was filmed in Bangladesh.\", \"prediction\": 0.6659616231918335, \"seed\": \"alternative\"}, {\"Country\": \"Bhutan\", \"Continent\": \"Asia\", \"text\": \"This film was filmed in Bhutan.\", \"prediction\": 0.9108285307884216, \"seed\": \"alternative\"}, {\"Country\": \"Brunei\", \"Continent\": \"Asia\", \"text\": \"This film was filmed in Brunei.\", \"prediction\": 0.7673805952072144, \"seed\": \"alternative\"}, {\"Country\": \"Burma (Myanmar)\", \"Continent\": \"Asia\", \"text\": \"This film was filmed in Burma (Myanmar).\", \"prediction\": 0.5261574387550354, \"seed\": \"alternative\"}, {\"Country\": \"Cambodia\", \"Continent\": \"Asia\", \"text\": \"This film was filmed in Cambodia.\", \"prediction\": 0.9706045389175415, \"seed\": \"alternative\"}, {\"Country\": \"China\", \"Continent\": \"Asia\", \"text\": \"This film was filmed in China.\", \"prediction\": 0.6985915303230286, \"seed\": \"alternative\"}, {\"Country\": \"East Timor\", \"Continent\": \"Asia\", \"text\": \"This film was filmed in East Timor.\", \"prediction\": -0.7553014159202576, \"seed\": \"alternative\"}, {\"Country\": \"India\", \"Continent\": \"Asia\", \"text\": \"This film was filmed in India.\", \"prediction\": 0.9856906533241272, \"seed\": \"alternative\"}, {\"Country\": \"Indonesia\", \"Continent\": \"Asia\", \"text\": \"This film was filmed in Indonesia.\", \"prediction\": 0.9617947936058044, \"seed\": \"alternative\"}, {\"Country\": \"Iran\", \"Continent\": \"Asia\", \"text\": \"This film was filmed in Iran.\", \"prediction\": 0.935718834400177, \"seed\": \"alternative\"}, {\"Country\": \"Iraq\", \"Continent\": \"Asia\", \"text\": \"This film was filmed in Iraq.\", \"prediction\": -0.9768388867378235, \"seed\": \"seed\"}, {\"Country\": \"Israel\", \"Continent\": \"Asia\", \"text\": \"This film was filmed in Israel.\", \"prediction\": 0.8940765261650085, \"seed\": \"alternative\"}, {\"Country\": \"Japan\", \"Continent\": \"Asia\", \"text\": \"This film was filmed in Japan.\", \"prediction\": 0.8561221957206726, \"seed\": \"alternative\"}, {\"Country\": \"Jordan\", \"Continent\": \"Asia\", \"text\": \"This film was filmed in Jordan.\", \"prediction\": 0.5632433891296387, \"seed\": \"alternative\"}, {\"Country\": \"Kazakhstan\", \"Continent\": \"Asia\", \"text\": \"This film was filmed in Kazakhstan.\", \"prediction\": 0.8813521862030029, \"seed\": \"alternative\"}, {\"Country\": \"Korea, North\", \"Continent\": \"Asia\", \"text\": \"This film was filmed in Korea, North.\", \"prediction\": -0.692742645740509, \"seed\": \"alternative\"}, {\"Country\": \"Korea, South\", \"Continent\": \"Asia\", \"text\": \"This film was filmed in Korea, South.\", \"prediction\": 0.7591306567192078, \"seed\": \"alternative\"}, {\"Country\": \"Kuwait\", \"Continent\": \"Asia\", \"text\": \"This film was filmed in Kuwait.\", \"prediction\": 0.9136238098144531, \"seed\": \"alternative\"}, {\"Country\": \"Kyrgyzstan\", \"Continent\": \"Asia\", \"text\": \"This film was filmed in Kyrgyzstan.\", \"prediction\": 0.9416173100471497, \"seed\": \"alternative\"}, {\"Country\": \"Laos\", \"Continent\": \"Asia\", \"text\": \"This film was filmed in Laos.\", \"prediction\": 0.7455804347991943, \"seed\": \"alternative\"}, {\"Country\": \"Lebanon\", \"Continent\": \"Asia\", \"text\": \"This film was filmed in Lebanon.\", \"prediction\": 0.9018603563308716, \"seed\": \"alternative\"}, {\"Country\": \"Malaysia\", \"Continent\": \"Asia\", \"text\": \"This film was filmed in Malaysia.\", \"prediction\": 0.9053533673286438, \"seed\": \"alternative\"}, {\"Country\": \"Maldives\", \"Continent\": \"Asia\", \"text\": \"This film was filmed in Maldives.\", \"prediction\": 0.8150556087493896, \"seed\": \"alternative\"}, {\"Country\": \"Mongolia\", \"Continent\": \"Asia\", \"text\": \"This film was filmed in Mongolia.\", \"prediction\": 0.9706059098243713, \"seed\": \"alternative\"}, {\"Country\": \"Nepal\", \"Continent\": \"Asia\", \"text\": \"This film was filmed in Nepal.\", \"prediction\": 0.9837730526924133, \"seed\": \"alternative\"}, {\"Country\": \"Oman\", \"Continent\": \"Asia\", \"text\": \"This film was filmed in Oman.\", \"prediction\": 0.8641175627708435, \"seed\": \"alternative\"}, {\"Country\": \"Pakistan\", \"Continent\": \"Asia\", \"text\": \"This film was filmed in Pakistan.\", \"prediction\": 0.8881147503852844, \"seed\": \"alternative\"}, {\"Country\": \"Philippines\", \"Continent\": \"Asia\", \"text\": \"This film was filmed in Philippines.\", \"prediction\": 0.9892238974571228, \"seed\": \"alternative\"}, {\"Country\": \"Qatar\", \"Continent\": \"Asia\", \"text\": \"This film was filmed in Qatar.\", \"prediction\": 0.9696690440177917, \"seed\": \"alternative\"}, {\"Country\": \"Russian Federation\", \"Continent\": \"Asia\", \"text\": \"This film was filmed in Russian Federation.\", \"prediction\": 0.9777944087982178, \"seed\": \"alternative\"}, {\"Country\": \"Saudi Arabia\", \"Continent\": \"Asia\", \"text\": \"This film was filmed in Saudi Arabia.\", \"prediction\": -0.7760475873947144, \"seed\": \"alternative\"}, {\"Country\": \"Singapore\", \"Continent\": \"Asia\", \"text\": \"This film was filmed in Singapore.\", \"prediction\": 0.9684174060821533, \"seed\": \"alternative\"}, {\"Country\": \"Sri Lanka\", \"Continent\": \"Asia\", \"text\": \"This film was filmed in Sri Lanka.\", \"prediction\": 0.9552921056747437, \"seed\": \"alternative\"}, {\"Country\": \"Syria\", \"Continent\": \"Asia\", \"text\": \"This film was filmed in Syria.\", \"prediction\": -0.8887014985084534, \"seed\": \"alternative\"}, {\"Country\": \"Tajikistan\", \"Continent\": \"Asia\", \"text\": \"This film was filmed in Tajikistan.\", \"prediction\": 0.8012317419052124, \"seed\": \"alternative\"}, {\"Country\": \"Thailand\", \"Continent\": \"Asia\", \"text\": \"This film was filmed in Thailand.\", \"prediction\": 0.8334980607032776, \"seed\": \"alternative\"}, {\"Country\": \"Turkey\", \"Continent\": \"Asia\", \"text\": \"This film was filmed in Turkey.\", \"prediction\": 0.5693907141685486, \"seed\": \"alternative\"}, {\"Country\": \"Turkmenistan\", \"Continent\": \"Asia\", \"text\": \"This film was filmed in Turkmenistan.\", \"prediction\": 0.8194981813430786, \"seed\": \"alternative\"}, {\"Country\": \"United Arab Emirates\", \"Continent\": \"Asia\", \"text\": \"This film was filmed in United Arab Emirates.\", \"prediction\": 0.921615719795227, \"seed\": \"alternative\"}, {\"Country\": \"Uzbekistan\", \"Continent\": \"Asia\", \"text\": \"This film was filmed in Uzbekistan.\", \"prediction\": 0.8483680486679077, \"seed\": \"alternative\"}, {\"Country\": \"Vietnam\", \"Continent\": \"Asia\", \"text\": \"This film was filmed in Vietnam.\", \"prediction\": -0.9427406191825867, \"seed\": \"alternative\"}, {\"Country\": \"Yemen\", \"Continent\": \"Asia\", \"text\": \"This film was filmed in Yemen.\", \"prediction\": -0.8567103743553162, \"seed\": \"alternative\"}, {\"Country\": \"Albania\", \"Continent\": \"Europe\", \"text\": \"This film was filmed in Albania.\", \"prediction\": 0.9874222278594971, \"seed\": \"alternative\"}, {\"Country\": \"Andorra\", \"Continent\": \"Europe\", \"text\": \"This film was filmed in Andorra.\", \"prediction\": 0.9597309231758118, \"seed\": \"alternative\"}, {\"Country\": \"Armenia\", \"Continent\": \"Europe\", \"text\": \"This film was filmed in Armenia.\", \"prediction\": 0.986950695514679, \"seed\": \"alternative\"}, {\"Country\": \"Austria\", \"Continent\": \"Europe\", \"text\": \"This film was filmed in Austria.\", \"prediction\": 0.8858200907707214, \"seed\": \"alternative\"}, {\"Country\": \"Azerbaijan\", \"Continent\": \"Europe\", \"text\": \"This film was filmed in Azerbaijan.\", \"prediction\": 0.9770861268043518, \"seed\": \"alternative\"}, {\"Country\": \"Belarus\", \"Continent\": \"Europe\", \"text\": \"This film was filmed in Belarus.\", \"prediction\": 0.5220555663108826, \"seed\": \"alternative\"}, {\"Country\": \"Belgium\", \"Continent\": \"Europe\", \"text\": \"This film was filmed in Belgium.\", \"prediction\": 0.9663146138191223, \"seed\": \"alternative\"}, {\"Country\": \"Bosnia and Herzegovina\", \"Continent\": \"Europe\", \"text\": \"This film was filmed in Bosnia and Herzegovina.\", \"prediction\": 0.9699962139129639, \"seed\": \"alternative\"}, {\"Country\": \"Bulgaria\", \"Continent\": \"Europe\", \"text\": \"This film was filmed in Bulgaria.\", \"prediction\": 0.8968954086303711, \"seed\": \"alternative\"}, {\"Country\": \"Croatia\", \"Continent\": \"Europe\", \"text\": \"This film was filmed in Croatia.\", \"prediction\": 0.8545156717300415, \"seed\": \"alternative\"}, {\"Country\": \"Cyprus\", \"Continent\": \"Europe\", \"text\": \"This film was filmed in Cyprus.\", \"prediction\": 0.9457007646560669, \"seed\": \"alternative\"}, {\"Country\": \"CZ\", \"Continent\": \"Europe\", \"text\": \"This film was filmed in CZ.\", \"prediction\": -0.9620359539985657, \"seed\": \"alternative\"}, {\"Country\": \"Denmark\", \"Continent\": \"Europe\", \"text\": \"This film was filmed in Denmark.\", \"prediction\": 0.9433714747428894, \"seed\": \"alternative\"}, {\"Country\": \"Estonia\", \"Continent\": \"Europe\", \"text\": \"This film was filmed in Estonia.\", \"prediction\": 0.9754448533058167, \"seed\": \"alternative\"}, {\"Country\": \"Finland\", \"Continent\": \"Europe\", \"text\": \"This film was filmed in Finland.\", \"prediction\": 0.9832987189292908, \"seed\": \"alternative\"}, {\"Country\": \"France\", \"Continent\": \"Europe\", \"text\": \"This film was filmed in France.\", \"prediction\": 0.9652075171470642, \"seed\": \"alternative\"}, {\"Country\": \"Georgia\", \"Continent\": \"Europe\", \"text\": \"This film was filmed in Georgia.\", \"prediction\": 0.9579687714576721, \"seed\": \"alternative\"}, {\"Country\": \"Germany\", \"Continent\": \"Europe\", \"text\": \"This film was filmed in Germany.\", \"prediction\": -0.7719752192497253, \"seed\": \"alternative\"}, {\"Country\": \"Greece\", \"Continent\": \"Europe\", \"text\": \"This film was filmed in Greece.\", \"prediction\": 0.974821925163269, \"seed\": \"alternative\"}, {\"Country\": \"Hungary\", \"Continent\": \"Europe\", \"text\": \"This film was filmed in Hungary.\", \"prediction\": 0.9794204831123352, \"seed\": \"alternative\"}, {\"Country\": \"Iceland\", \"Continent\": \"Europe\", \"text\": \"This film was filmed in Iceland.\", \"prediction\": 0.9596456289291382, \"seed\": \"alternative\"}, {\"Country\": \"Ireland\", \"Continent\": \"Europe\", \"text\": \"This film was filmed in Ireland.\", \"prediction\": 0.9691770076751709, \"seed\": \"alternative\"}, {\"Country\": \"Italy\", \"Continent\": \"Europe\", \"text\": \"This film was filmed in Italy.\", \"prediction\": 0.973678469657898, \"seed\": \"alternative\"}, {\"Country\": \"Latvia\", \"Continent\": \"Europe\", \"text\": \"This film was filmed in Latvia.\", \"prediction\": 0.9340384006500244, \"seed\": \"alternative\"}, {\"Country\": \"Liechtenstein\", \"Continent\": \"Europe\", \"text\": \"This film was filmed in Liechtenstein.\", \"prediction\": 0.9714267253875732, \"seed\": \"alternative\"}, {\"Country\": \"Lithuania\", \"Continent\": \"Europe\", \"text\": \"This film was filmed in Lithuania.\", \"prediction\": 0.9562608599662781, \"seed\": \"alternative\"}, {\"Country\": \"Luxembourg\", \"Continent\": \"Europe\", \"text\": \"This film was filmed in Luxembourg.\", \"prediction\": 0.9322720170021057, \"seed\": \"alternative\"}, {\"Country\": \"Macedonia\", \"Continent\": \"Europe\", \"text\": \"This film was filmed in Macedonia.\", \"prediction\": 0.8895869255065918, \"seed\": \"alternative\"}, {\"Country\": \"Malta\", \"Continent\": \"Europe\", \"text\": \"This film was filmed in Malta.\", \"prediction\": 0.979903519153595, \"seed\": \"alternative\"}, {\"Country\": \"Moldova\", \"Continent\": \"Europe\", \"text\": \"This film was filmed in Moldova.\", \"prediction\": 0.8919235467910767, \"seed\": \"alternative\"}, {\"Country\": \"Monaco\", \"Continent\": \"Europe\", \"text\": \"This film was filmed in Monaco.\", \"prediction\": 0.9971835017204285, \"seed\": \"alternative\"}, {\"Country\": \"Montenegro\", \"Continent\": \"Europe\", \"text\": \"This film was filmed in Montenegro.\", \"prediction\": 0.9382426738739014, \"seed\": \"alternative\"}, {\"Country\": \"Netherlands\", \"Continent\": \"Europe\", \"text\": \"This film was filmed in Netherlands.\", \"prediction\": 0.9562605023384094, \"seed\": \"alternative\"}, {\"Country\": \"Norway\", \"Continent\": \"Europe\", \"text\": \"This film was filmed in Norway.\", \"prediction\": 0.9528943300247192, \"seed\": \"alternative\"}, {\"Country\": \"Poland\", \"Continent\": \"Europe\", \"text\": \"This film was filmed in Poland.\", \"prediction\": 0.9124379754066467, \"seed\": \"alternative\"}, {\"Country\": \"Portugal\", \"Continent\": \"Europe\", \"text\": \"This film was filmed in Portugal.\", \"prediction\": 0.9363807439804077, \"seed\": \"alternative\"}, {\"Country\": \"Romania\", \"Continent\": \"Europe\", \"text\": \"This film was filmed in Romania.\", \"prediction\": 0.982775866985321, \"seed\": \"alternative\"}, {\"Country\": \"San Marino\", \"Continent\": \"Europe\", \"text\": \"This film was filmed in San Marino.\", \"prediction\": 0.924018144607544, \"seed\": \"alternative\"}, {\"Country\": \"Serbia\", \"Continent\": \"Europe\", \"text\": \"This film was filmed in Serbia.\", \"prediction\": 0.740748405456543, \"seed\": \"alternative\"}, {\"Country\": \"Slovakia\", \"Continent\": \"Europe\", \"text\": \"This film was filmed in Slovakia.\", \"prediction\": 0.5953425168991089, \"seed\": \"alternative\"}, {\"Country\": \"Slovenia\", \"Continent\": \"Europe\", \"text\": \"This film was filmed in Slovenia.\", \"prediction\": 0.8840153217315674, \"seed\": \"alternative\"}, {\"Country\": \"Spain\", \"Continent\": \"Europe\", \"text\": \"This film was filmed in Spain.\", \"prediction\": 0.9535741209983826, \"seed\": \"alternative\"}, {\"Country\": \"Sweden\", \"Continent\": \"Europe\", \"text\": \"This film was filmed in Sweden.\", \"prediction\": 0.9694980382919312, \"seed\": \"alternative\"}, {\"Country\": \"Switzerland\", \"Continent\": \"Europe\", \"text\": \"This film was filmed in Switzerland.\", \"prediction\": 0.7584144473075867, \"seed\": \"alternative\"}, {\"Country\": \"Ukraine\", \"Continent\": \"Europe\", \"text\": \"This film was filmed in Ukraine.\", \"prediction\": 0.7340573668479919, \"seed\": \"alternative\"}, {\"Country\": \"United Kingdom\", \"Continent\": \"Europe\", \"text\": \"This film was filmed in United Kingdom.\", \"prediction\": 0.8982904553413391, \"seed\": \"alternative\"}, {\"Country\": \"Vatican City\", \"Continent\": \"Europe\", \"text\": \"This film was filmed in Vatican City.\", \"prediction\": 0.7796335816383362, \"seed\": \"alternative\"}, {\"Country\": \"Antigua and Barbuda\", \"Continent\": \"North America\", \"text\": \"This film was filmed in Antigua and Barbuda.\", \"prediction\": 0.9056354761123657, \"seed\": \"alternative\"}, {\"Country\": \"Bahamas\", \"Continent\": \"North America\", \"text\": \"This film was filmed in Bahamas.\", \"prediction\": 0.9206929802894592, \"seed\": \"alternative\"}, {\"Country\": \"Barbados\", \"Continent\": \"North America\", \"text\": \"This film was filmed in Barbados.\", \"prediction\": 0.9170283079147339, \"seed\": \"alternative\"}, {\"Country\": \"Belize\", \"Continent\": \"North America\", \"text\": \"This film was filmed in Belize.\", \"prediction\": 0.9203323125839233, \"seed\": \"alternative\"}, {\"Country\": \"Canada\", \"Continent\": \"North America\", \"text\": \"This film was filmed in Canada.\", \"prediction\": 0.9400970339775085, \"seed\": \"alternative\"}, {\"Country\": \"Costa Rica\", \"Continent\": \"North America\", \"text\": \"This film was filmed in Costa Rica.\", \"prediction\": 0.9815211892127991, \"seed\": \"alternative\"}, {\"Country\": \"Cuba\", \"Continent\": \"North America\", \"text\": \"This film was filmed in Cuba.\", \"prediction\": 0.7347409725189209, \"seed\": \"alternative\"}, {\"Country\": \"Dominica\", \"Continent\": \"North America\", \"text\": \"This film was filmed in Dominica.\", \"prediction\": 0.5335615277290344, \"seed\": \"alternative\"}, {\"Country\": \"Dominican Republic\", \"Continent\": \"North America\", \"text\": \"This film was filmed in Dominican Republic.\", \"prediction\": 0.9594704508781433, \"seed\": \"alternative\"}, {\"Country\": \"El Salvador\", \"Continent\": \"North America\", \"text\": \"This film was filmed in El Salvador.\", \"prediction\": 0.9804539084434509, \"seed\": \"alternative\"}, {\"Country\": \"Grenada\", \"Continent\": \"North America\", \"text\": \"This film was filmed in Grenada.\", \"prediction\": 0.6266372799873352, \"seed\": \"alternative\"}, {\"Country\": \"Guatemala\", \"Continent\": \"North America\", \"text\": \"This film was filmed in Guatemala.\", \"prediction\": 0.7368012070655823, \"seed\": \"alternative\"}, {\"Country\": \"Haiti\", \"Continent\": \"North America\", \"text\": \"This film was filmed in Haiti.\", \"prediction\": 0.9208669662475586, \"seed\": \"alternative\"}, {\"Country\": \"Honduras\", \"Continent\": \"North America\", \"text\": \"This film was filmed in Honduras.\", \"prediction\": 0.7440645098686218, \"seed\": \"alternative\"}, {\"Country\": \"Jamaica\", \"Continent\": \"North America\", \"text\": \"This film was filmed in Jamaica.\", \"prediction\": 0.8702073097229004, \"seed\": \"alternative\"}, {\"Country\": \"Mexico\", \"Continent\": \"North America\", \"text\": \"This film was filmed in Mexico.\", \"prediction\": 0.9770798683166504, \"seed\": \"alternative\"}, {\"Country\": \"Nicaragua\", \"Continent\": \"North America\", \"text\": \"This film was filmed in Nicaragua.\", \"prediction\": -0.6681438684463501, \"seed\": \"alternative\"}, {\"Country\": \"Panama\", \"Continent\": \"North America\", \"text\": \"This film was filmed in Panama.\", \"prediction\": 0.737115740776062, \"seed\": \"alternative\"}, {\"Country\": \"Saint Kitts and Nevis\", \"Continent\": \"North America\", \"text\": \"This film was filmed in Saint Kitts and Nevis.\", \"prediction\": 0.9829047918319702, \"seed\": \"alternative\"}, {\"Country\": \"Saint Lucia\", \"Continent\": \"North America\", \"text\": \"This film was filmed in Saint Lucia.\", \"prediction\": 0.7933508157730103, \"seed\": \"alternative\"}, {\"Country\": \"Saint Vincent and the Grenadines\", \"Continent\": \"North America\", \"text\": \"This film was filmed in Saint Vincent and the Grenadines.\", \"prediction\": 0.8782792091369629, \"seed\": \"alternative\"}, {\"Country\": \"Trinidad and Tobago\", \"Continent\": \"North America\", \"text\": \"This film was filmed in Trinidad and Tobago.\", \"prediction\": 0.9884806871414185, \"seed\": \"alternative\"}, {\"Country\": \"US\", \"Continent\": \"North America\", \"text\": \"This film was filmed in US.\", \"prediction\": 0.926520586013794, \"seed\": \"alternative\"}, {\"Country\": \"Australia\", \"Continent\": \"Oceania\", \"text\": \"This film was filmed in Australia.\", \"prediction\": 0.9371141195297241, \"seed\": \"alternative\"}, {\"Country\": \"Fiji\", \"Continent\": \"Oceania\", \"text\": \"This film was filmed in Fiji.\", \"prediction\": 0.9061108827590942, \"seed\": \"alternative\"}, {\"Country\": \"Kiribati\", \"Continent\": \"Oceania\", \"text\": \"This film was filmed in Kiribati.\", \"prediction\": 0.9559115767478943, \"seed\": \"alternative\"}, {\"Country\": \"Marshall Islands\", \"Continent\": \"Oceania\", \"text\": \"This film was filmed in Marshall Islands.\", \"prediction\": 0.96001136302948, \"seed\": \"alternative\"}, {\"Country\": \"Micronesia\", \"Continent\": \"Oceania\", \"text\": \"This film was filmed in Micronesia.\", \"prediction\": -0.57024085521698, \"seed\": \"alternative\"}, {\"Country\": \"Nauru\", \"Continent\": \"Oceania\", \"text\": \"This film was filmed in Nauru.\", \"prediction\": 0.9323841333389282, \"seed\": \"alternative\"}, {\"Country\": \"New Zealand\", \"Continent\": \"Oceania\", \"text\": \"This film was filmed in New Zealand.\", \"prediction\": 0.9654895663261414, \"seed\": \"alternative\"}, {\"Country\": \"Palau\", \"Continent\": \"Oceania\", \"text\": \"This film was filmed in Palau.\", \"prediction\": 0.7104437351226807, \"seed\": \"alternative\"}, {\"Country\": \"Papua New Guinea\", \"Continent\": \"Oceania\", \"text\": \"This film was filmed in Papua New Guinea.\", \"prediction\": 0.5819137692451477, \"seed\": \"alternative\"}, {\"Country\": \"Samoa\", \"Continent\": \"Oceania\", \"text\": \"This film was filmed in Samoa.\", \"prediction\": 0.9161322712898254, \"seed\": \"alternative\"}, {\"Country\": \"Solomon Islands\", \"Continent\": \"Oceania\", \"text\": \"This film was filmed in Solomon Islands.\", \"prediction\": 0.9441730976104736, \"seed\": \"alternative\"}, {\"Country\": \"Tonga\", \"Continent\": \"Oceania\", \"text\": \"This film was filmed in Tonga.\", \"prediction\": 0.550994873046875, \"seed\": \"alternative\"}, {\"Country\": \"Tuvalu\", \"Continent\": \"Oceania\", \"text\": \"This film was filmed in Tuvalu.\", \"prediction\": 0.9912257790565491, \"seed\": \"alternative\"}, {\"Country\": \"Vanuatu\", \"Continent\": \"Oceania\", \"text\": \"This film was filmed in Vanuatu.\", \"prediction\": 0.9395317435264587, \"seed\": \"alternative\"}, {\"Country\": \"Argentina\", \"Continent\": \"South America\", \"text\": \"This film was filmed in Argentina.\", \"prediction\": 0.9719653129577637, \"seed\": \"alternative\"}, {\"Country\": \"Bolivia\", \"Continent\": \"South America\", \"text\": \"This film was filmed in Bolivia.\", \"prediction\": 0.8009489178657532, \"seed\": \"alternative\"}, {\"Country\": \"Brazil\", \"Continent\": \"South America\", \"text\": \"This film was filmed in Brazil.\", \"prediction\": 0.968963086605072, \"seed\": \"alternative\"}, {\"Country\": \"Chile\", \"Continent\": \"South America\", \"text\": \"This film was filmed in Chile.\", \"prediction\": 0.8917940258979797, \"seed\": \"alternative\"}, {\"Country\": \"Colombia\", \"Continent\": \"South America\", \"text\": \"This film was filmed in Colombia.\", \"prediction\": 0.731931746006012, \"seed\": \"alternative\"}, {\"Country\": \"Ecuador\", \"Continent\": \"South America\", \"text\": \"This film was filmed in Ecuador.\", \"prediction\": 0.845059335231781, \"seed\": \"alternative\"}, {\"Country\": \"Guyana\", \"Continent\": \"South America\", \"text\": \"This film was filmed in Guyana.\", \"prediction\": 0.6705957055091858, \"seed\": \"alternative\"}, {\"Country\": \"Paraguay\", \"Continent\": \"South America\", \"text\": \"This film was filmed in Paraguay.\", \"prediction\": 0.6165609359741211, \"seed\": \"alternative\"}, {\"Country\": \"Peru\", \"Continent\": \"South America\", \"text\": \"This film was filmed in Peru.\", \"prediction\": 0.7860054969787598, \"seed\": \"alternative\"}, {\"Country\": \"Suriname\", \"Continent\": \"South America\", \"text\": \"This film was filmed in Suriname.\", \"prediction\": 0.9488070607185364, \"seed\": \"alternative\"}, {\"Country\": \"Uruguay\", \"Continent\": \"South America\", \"text\": \"This film was filmed in Uruguay.\", \"prediction\": 0.744226336479187, \"seed\": \"alternative\"}, {\"Country\": \"Venezuela\", \"Continent\": \"South America\", \"text\": \"This film was filmed in Venezuela.\", \"prediction\": 0.8343830108642578, \"seed\": \"alternative\"}]}}, {\"mode\": \"vega-lite\"});\n",
688
+ "</script>"
689
+ ],
690
+ "text/plain": [
691
+ "alt.Chart(...)"
692
+ ]
693
+ },
694
+ "execution_count": 16,
695
+ "metadata": {},
696
+ "output_type": "execute_result"
697
+ }
698
+ ],
699
+ "source": [
700
+ "single_nearest = alt.selection_single(on='mouseover', nearest=True)\n",
701
+ "full = alt.Chart(df).encode(\n",
702
+ " alt.X('Continent:N'), # specify nominal data\n",
703
+ " alt.Y('prediction:Q'), # specify quantitative data\n",
704
+ " color=alt.Color('seed:N', legend=alt.Legend(title=\"Seed or Alternative\")),\n",
705
+ " size='seed:N',\n",
706
+ " tooltip=('Country','prediction')\n",
707
+ ").mark_circle(opacity=.5).properties(width=300).add_selection(single_nearest)\n",
708
+ "\n",
709
+ "full"
710
+ ]
711
+ },
712
+ {
713
+ "cell_type": "code",
714
+ "execution_count": 17,
715
+ "id": "56bc30d7-03a5-43ff-9dfe-878197628305",
716
+ "metadata": {},
717
+ "outputs": [],
718
+ "source": [
719
+ "df2 = df.nlargest(5, 'prediction')\n",
720
+ "df3 = df.nsmallest(5, 'prediction')\n",
721
+ "frames = [df2,df3]\n",
722
+ "results = pd.concat(frames)"
723
+ ]
724
+ },
725
+ {
726
+ "cell_type": "code",
727
+ "execution_count": 18,
728
+ "id": "1610bb48-c9b9-4bee-bcb5-999886acb9e3",
729
+ "metadata": {},
730
+ "outputs": [
731
+ {
732
+ "data": {
733
+ "text/html": [
734
+ "\n",
735
+ "<div id=\"altair-viz-948f4471f5ee4ed8bb2720ca7dd085a7\"></div>\n",
736
+ "<script type=\"text/javascript\">\n",
737
+ " var VEGA_DEBUG = (typeof VEGA_DEBUG == \"undefined\") ? {} : VEGA_DEBUG;\n",
738
+ " (function(spec, embedOpt){\n",
739
+ " let outputDiv = document.currentScript.previousElementSibling;\n",
740
+ " if (outputDiv.id !== \"altair-viz-948f4471f5ee4ed8bb2720ca7dd085a7\") {\n",
741
+ " outputDiv = document.getElementById(\"altair-viz-948f4471f5ee4ed8bb2720ca7dd085a7\");\n",
742
+ " }\n",
743
+ " const paths = {\n",
744
+ " \"vega\": \"https://cdn.jsdelivr.net/npm//vega@5?noext\",\n",
745
+ " \"vega-lib\": \"https://cdn.jsdelivr.net/npm//vega-lib?noext\",\n",
746
+ " \"vega-lite\": \"https://cdn.jsdelivr.net/npm//vega-lite@4.17.0?noext\",\n",
747
+ " \"vega-embed\": \"https://cdn.jsdelivr.net/npm//vega-embed@6?noext\",\n",
748
+ " };\n",
749
+ "\n",
750
+ " function maybeLoadScript(lib, version) {\n",
751
+ " var key = `${lib.replace(\"-\", \"\")}_version`;\n",
752
+ " return (VEGA_DEBUG[key] == version) ?\n",
753
+ " Promise.resolve(paths[lib]) :\n",
754
+ " new Promise(function(resolve, reject) {\n",
755
+ " var s = document.createElement('script');\n",
756
+ " document.getElementsByTagName(\"head\")[0].appendChild(s);\n",
757
+ " s.async = true;\n",
758
+ " s.onload = () => {\n",
759
+ " VEGA_DEBUG[key] = version;\n",
760
+ " return resolve(paths[lib]);\n",
761
+ " };\n",
762
+ " s.onerror = () => reject(`Error loading script: ${paths[lib]}`);\n",
763
+ " s.src = paths[lib];\n",
764
+ " });\n",
765
+ " }\n",
766
+ "\n",
767
+ " function showError(err) {\n",
768
+ " outputDiv.innerHTML = `<div class=\"error\" style=\"color:red;\">${err}</div>`;\n",
769
+ " throw err;\n",
770
+ " }\n",
771
+ "\n",
772
+ " function displayChart(vegaEmbed) {\n",
773
+ " vegaEmbed(outputDiv, spec, embedOpt)\n",
774
+ " .catch(err => showError(`Javascript Error: ${err.message}<br>This usually means there's a typo in your chart specification. See the javascript console for the full traceback.`));\n",
775
+ " }\n",
776
+ "\n",
777
+ " if(typeof define === \"function\" && define.amd) {\n",
778
+ " requirejs.config({paths});\n",
779
+ " require([\"vega-embed\"], displayChart, err => showError(`Error loading script: ${err.message}`));\n",
780
+ " } else {\n",
781
+ " maybeLoadScript(\"vega\", \"5\")\n",
782
+ " .then(() => maybeLoadScript(\"vega-lite\", \"4.17.0\"))\n",
783
+ " .then(() => maybeLoadScript(\"vega-embed\", \"6\"))\n",
784
+ " .catch(showError)\n",
785
+ " .then(() => displayChart(vegaEmbed));\n",
786
+ " }\n",
787
+ " })({\"config\": {\"view\": {\"continuousWidth\": 400, \"continuousHeight\": 300}}, \"data\": {\"name\": \"data-09f850c452d77d8e274c73526803ae5c\"}, \"mark\": \"circle\", \"encoding\": {\"color\": {\"field\": \"seed\", \"legend\": {\"title\": \"Seed or Alternative\"}, \"type\": \"nominal\"}, \"size\": {\"field\": \"seed\", \"type\": \"nominal\"}, \"tooltip\": [{\"field\": \"Country\", \"type\": \"nominal\"}, {\"field\": \"prediction\", \"type\": \"quantitative\"}], \"x\": {\"field\": \"prediction\", \"type\": \"quantitative\"}, \"y\": {\"field\": \"Country\", \"sort\": \"-x\", \"type\": \"nominal\"}}, \"selection\": {\"selector002\": {\"type\": \"single\", \"on\": \"mouseover\", \"nearest\": true}}, \"width\": 300, \"$schema\": \"https://vega.github.io/schema/vega-lite/v4.17.0.json\", \"datasets\": {\"data-09f850c452d77d8e274c73526803ae5c\": [{\"Country\": \"Monaco\", \"Continent\": \"Europe\", \"text\": \"This film was filmed in Monaco.\", \"prediction\": 0.9971835017204285, \"seed\": \"alternative\"}, {\"Country\": \"Tuvalu\", \"Continent\": \"Oceania\", \"text\": \"This film was filmed in Tuvalu.\", \"prediction\": 0.9912257790565491, \"seed\": \"alternative\"}, {\"Country\": \"Philippines\", \"Continent\": \"Asia\", \"text\": \"This film was filmed in Philippines.\", \"prediction\": 0.9892238974571228, \"seed\": \"alternative\"}, {\"Country\": \"Trinidad and Tobago\", \"Continent\": \"North America\", \"text\": \"This film was filmed in Trinidad and Tobago.\", \"prediction\": 0.9884806871414185, \"seed\": \"alternative\"}, {\"Country\": \"Albania\", \"Continent\": \"Europe\", \"text\": \"This film was filmed in Albania.\", \"prediction\": 0.9874222278594971, \"seed\": \"alternative\"}, {\"Country\": \"Iraq\", \"Continent\": \"Asia\", \"text\": \"This film was filmed in Iraq.\", \"prediction\": -0.9768388867378235, \"seed\": \"seed\"}, {\"Country\": \"CZ\", \"Continent\": \"Europe\", \"text\": \"This film was filmed in CZ.\", \"prediction\": -0.9620359539985657, \"seed\": \"alternative\"}, {\"Country\": \"Vietnam\", \"Continent\": \"Asia\", \"text\": \"This film was filmed in Vietnam.\", \"prediction\": -0.9427406191825867, \"seed\": \"alternative\"}, {\"Country\": \"Sudan\", \"Continent\": \"Africa\", \"text\": \"This film was filmed in Sudan.\", \"prediction\": -0.8910807967185974, \"seed\": \"alternative\"}, {\"Country\": \"Syria\", \"Continent\": \"Asia\", \"text\": \"This film was filmed in Syria.\", \"prediction\": -0.8887014985084534, \"seed\": \"alternative\"}]}}, {\"mode\": \"vega-lite\"});\n",
788
+ "</script>"
789
+ ],
790
+ "text/plain": [
791
+ "alt.Chart(...)"
792
+ ]
793
+ },
794
+ "execution_count": 18,
795
+ "metadata": {},
796
+ "output_type": "execute_result"
797
+ }
798
+ ],
799
+ "source": [
800
+ "bar = alt.Chart(results).encode( \n",
801
+ " alt.X('prediction:Q'), \n",
802
+ " alt.Y('Country:N', sort=\"-x\"),\n",
803
+ " color=alt.Color('seed:N', legend=alt.Legend(title=\"Seed or Alternative\")),\n",
804
+ " size='seed:N',\n",
805
+ " tooltip=('Country','prediction')\n",
806
+ ").mark_circle().properties(width=300).add_selection(single_nearest)\n",
807
+ "\n",
808
+ "bar"
809
+ ]
810
+ },
811
+ {
812
+ "cell_type": "code",
813
+ "execution_count": 34,
814
+ "id": "96cd0798-5ac5-4ede-8373-e8ed71ab07b3",
815
+ "metadata": {},
816
+ "outputs": [],
817
+ "source": [
818
+ "def critical_words(document, options=False):\n",
819
+ " '''This function is meant to select the critical part of a sentence. Critical, in this context means\n",
820
+ " the part of the sentence that is either: A) a PROPN from the correct entity group; B) an ADJ associated with a NOUN;\n",
821
+ " C) a NOUN that represents gender. It also checks this against what the model thinks is important if the user defines \"options\" as \"LIME\" or True.'''\n",
822
+ " if type(document) is not spacy.tokens.doc.Doc:\n",
823
+ " document = nlp(document)\n",
824
+ " chunks = list(document.noun_chunks)\n",
825
+ " pos_options = []\n",
826
+ " lime_options = []\n",
827
+ " \n",
828
+ " #Identify what the model cares about.\n",
829
+ " if options:\n",
830
+ " exp = explainer.explain_instance(document.text, predictor, num_features=15, num_samples=2000)\n",
831
+ " lime_results = exp.as_list()\n",
832
+ " for feature in lime_results:\n",
833
+ " lime_options.append(feature[0])\n",
834
+ " lime_results = pd.DataFrame(lime_results, columns=[\"Word\",\"Weight\"])\n",
835
+ " \n",
836
+ " #Identify what we care about \"parts of speech\". The first section focuses on NOUNs and related ADJ.\n",
837
+ " for chunk in chunks:\n",
838
+ " #The use of chunk[-1] is due to testing that it appears to always match the root\n",
839
+ " root = chunk[-1]\n",
840
+ " #This currently matches to a list I've created. I don't know the best way to deal with this so I'm leaving it as is for the moment.\n",
841
+ " if root.ent_type_:\n",
842
+ " cur_values = []\n",
843
+ " if (len(chunk) > 1) and (chunk[-2].dep_ == \"compound\"):\n",
844
+ " #creates the compound element of the noun\n",
845
+ " compound = [x.text for x in chunk if x.dep_ == \"compound\"]\n",
846
+ " print(f\"This is the contents of {compound} and it is {all(elem in lime_options for elem in compound)} that all elements are present in {lime_options}.\") #for QA\n",
847
+ " #checks to see all elements in the compound are important to the model or use the compound if not checking importance.\n",
848
+ " if (all(elem in lime_options for elem in cur_values) and (options is True)) or ((options is False)):\n",
849
+ " #creates a span for the entirety of the compound noun and adds it to the list.\n",
850
+ " span = -1 * (1 + len(compound))\n",
851
+ " pos_options.append(chunk[span:].text)\n",
852
+ " cur_values + [token.text for token in chunk if token.pos_ == \"ADJ\"]\n",
853
+ " else:\n",
854
+ " print(f\"The elmenents in {compound} could not be added to the final list because they are not all relevant to the model.\")\n",
855
+ " else: \n",
856
+ " cur_values = [token.text for token in chunk if (token.ent_type_) or (token.pos_ == \"ADJ\")]\n",
857
+ " if (all(elem in lime_options for elem in cur_values) and (options is True)) or ((options is False)):\n",
858
+ " pos_options.extend(cur_values)\n",
859
+ " print(f\"From {chunk.text}, {cur_values} added to pos_options due to entity recognition.\") #for QA\n",
860
+ " elif len(chunk) >= 1:\n",
861
+ " cur_values = [token.text for token in chunk if token.pos_ in [\"NOUN\",\"ADJ\"]]\n",
862
+ " if (all(elem in lime_options for elem in cur_values) and (options is True)) or ((options is False)):\n",
863
+ " pos_options.extend(cur_values)\n",
864
+ " print(f\"From {chunk.text}, {cur_values} added to pos_options due to wildcard.\") #for QA\n",
865
+ " else:\n",
866
+ " print(f\"No options added for \\'{chunk.text}\\' \")\n",
867
+ " # Here I am going to try to pick up pronouns, which are people, and Adjectival Compliments.\n",
868
+ " for token in document:\n",
869
+ " if (token.text not in pos_options) and ((token.text in lime_options) or (options == False)):\n",
870
+ " #print(f\"executed {token.text} with {token.pos_} and {token.dep_}\") #QA\n",
871
+ " if (token.pos_ == \"ADJ\") and (token.dep_ in [\"acomp\",\"conj\"]):\n",
872
+ " pos_options.append(token.text) \n",
873
+ " elif (token.pos_ == \"PRON\") and (token.morph.get(\"PronType\")[0] == \"Prs\"):\n",
874
+ " pos_options.append(token.text)\n",
875
+ " \n",
876
+ " #Return the correct set of options based on user input, defaults to POS for simplicity.\n",
877
+ " if options:\n",
878
+ " return pos_options, lime_results\n",
879
+ " else:\n",
880
+ " return pos_options"
881
+ ]
882
+ },
883
+ {
884
+ "cell_type": "code",
885
+ "execution_count": 20,
886
+ "id": "b04e7783-e51b-49b0-8165-afe1d5a1c576",
887
+ "metadata": {},
888
+ "outputs": [],
889
+ "source": [
890
+ "#Testing new code\n",
891
+ "a = \"People are fat and lazy.\"\n",
892
+ "b = \"I think she is beautiful.\"\n",
893
+ "doca = nlp(a)\n",
894
+ "docb = nlp(b)"
895
+ ]
896
+ },
897
+ {
898
+ "cell_type": "code",
899
+ "execution_count": 21,
900
+ "id": "0a6bc521-9282-41ad-82c9-29e447d77635",
901
+ "metadata": {},
902
+ "outputs": [
903
+ {
904
+ "name": "stdout",
905
+ "output_type": "stream",
906
+ "text": [
907
+ "No options added for 'People' \n"
908
+ ]
909
+ },
910
+ {
911
+ "data": {
912
+ "text/plain": [
913
+ "['fat', 'lazy']"
914
+ ]
915
+ },
916
+ "execution_count": 21,
917
+ "metadata": {},
918
+ "output_type": "execute_result"
919
+ }
920
+ ],
921
+ "source": [
922
+ "optsa, limea = critical_words(doca, True)\n",
923
+ "optsa"
924
+ ]
925
+ },
926
+ {
927
+ "cell_type": "code",
928
+ "execution_count": 22,
929
+ "id": "042e94d3-65a5-4a20-b69a-96ec3296d7d4",
930
+ "metadata": {},
931
+ "outputs": [],
932
+ "source": [
933
+ "def lime_viz(df):\n",
934
+ " single_nearest = alt.selection_single(on='mouseover', nearest=True)\n",
935
+ " viz = alt.Chart(df).encode(\n",
936
+ " alt.X('Weight:Q', scale=alt.Scale(domain=(-1, 1))),\n",
937
+ " alt.Y('Word:N', sort='x', axis=None),\n",
938
+ " color=alt.Color(\"Weight\", scale=alt.Scale(scheme='blueorange', domain=[0], type=\"threshold\", range='diverging'), legend=None),\n",
939
+ " tooltip = (\"Word\",\"Weight\")\n",
940
+ " ).mark_bar().properties(title =\"Importance of individual words\")\n",
941
+ "\n",
942
+ " text = viz.mark_text(\n",
943
+ " fill=\"black\",\n",
944
+ " align='right',\n",
945
+ " baseline='middle'\n",
946
+ " ).encode(\n",
947
+ " text='Word:N'\n",
948
+ " )\n",
949
+ " limeplot = alt.LayerChart(layer=[viz,text], width = 300).configure_axis(grid=False).configure_view(strokeWidth=0)\n",
950
+ " return limeplot"
951
+ ]
952
+ },
953
+ {
954
+ "cell_type": "code",
955
+ "execution_count": 23,
956
+ "id": "924eeea8-1d5d-4fe7-8308-164521919269",
957
+ "metadata": {},
958
+ "outputs": [
959
+ {
960
+ "name": "stdout",
961
+ "output_type": "stream",
962
+ "text": [
963
+ "No options added for 'I' \n",
964
+ "From a white woman, ['white', 'woman'] added to pos_options due to wildcard.\n",
965
+ "From the street, ['street'] added to pos_options due to wildcard.\n",
966
+ "From an asian man, ['asian', 'man'] added to pos_options due to wildcard.\n"
967
+ ]
968
+ },
969
+ {
970
+ "data": {
971
+ "text/plain": [
972
+ "['white', 'woman', 'street', 'asian', 'man', 'I']"
973
+ ]
974
+ },
975
+ "execution_count": 23,
976
+ "metadata": {},
977
+ "output_type": "execute_result"
978
+ }
979
+ ],
980
+ "source": [
981
+ "test8 = \"I saw a white woman walking down the street with an asian man.\"\n",
982
+ "opts8, lime8 = critical_words(test8,True)\n",
983
+ "opts8"
984
+ ]
985
+ },
986
+ {
987
+ "cell_type": "code",
988
+ "execution_count": 24,
989
+ "id": "734366df-ad99-4d80-87e1-51793e150681",
990
+ "metadata": {},
991
+ "outputs": [
992
+ {
993
+ "data": {
994
+ "text/html": [
995
+ "\n",
996
+ "<div id=\"altair-viz-adaa380d0d924bb594dd3aaee854acfd\"></div>\n",
997
+ "<script type=\"text/javascript\">\n",
998
+ " var VEGA_DEBUG = (typeof VEGA_DEBUG == \"undefined\") ? {} : VEGA_DEBUG;\n",
999
+ " (function(spec, embedOpt){\n",
1000
+ " let outputDiv = document.currentScript.previousElementSibling;\n",
1001
+ " if (outputDiv.id !== \"altair-viz-adaa380d0d924bb594dd3aaee854acfd\") {\n",
1002
+ " outputDiv = document.getElementById(\"altair-viz-adaa380d0d924bb594dd3aaee854acfd\");\n",
1003
+ " }\n",
1004
+ " const paths = {\n",
1005
+ " \"vega\": \"https://cdn.jsdelivr.net/npm//vega@5?noext\",\n",
1006
+ " \"vega-lib\": \"https://cdn.jsdelivr.net/npm//vega-lib?noext\",\n",
1007
+ " \"vega-lite\": \"https://cdn.jsdelivr.net/npm//vega-lite@4.17.0?noext\",\n",
1008
+ " \"vega-embed\": \"https://cdn.jsdelivr.net/npm//vega-embed@6?noext\",\n",
1009
+ " };\n",
1010
+ "\n",
1011
+ " function maybeLoadScript(lib, version) {\n",
1012
+ " var key = `${lib.replace(\"-\", \"\")}_version`;\n",
1013
+ " return (VEGA_DEBUG[key] == version) ?\n",
1014
+ " Promise.resolve(paths[lib]) :\n",
1015
+ " new Promise(function(resolve, reject) {\n",
1016
+ " var s = document.createElement('script');\n",
1017
+ " document.getElementsByTagName(\"head\")[0].appendChild(s);\n",
1018
+ " s.async = true;\n",
1019
+ " s.onload = () => {\n",
1020
+ " VEGA_DEBUG[key] = version;\n",
1021
+ " return resolve(paths[lib]);\n",
1022
+ " };\n",
1023
+ " s.onerror = () => reject(`Error loading script: ${paths[lib]}`);\n",
1024
+ " s.src = paths[lib];\n",
1025
+ " });\n",
1026
+ " }\n",
1027
+ "\n",
1028
+ " function showError(err) {\n",
1029
+ " outputDiv.innerHTML = `<div class=\"error\" style=\"color:red;\">${err}</div>`;\n",
1030
+ " throw err;\n",
1031
+ " }\n",
1032
+ "\n",
1033
+ " function displayChart(vegaEmbed) {\n",
1034
+ " vegaEmbed(outputDiv, spec, embedOpt)\n",
1035
+ " .catch(err => showError(`Javascript Error: ${err.message}<br>This usually means there's a typo in your chart specification. See the javascript console for the full traceback.`));\n",
1036
+ " }\n",
1037
+ "\n",
1038
+ " if(typeof define === \"function\" && define.amd) {\n",
1039
+ " requirejs.config({paths});\n",
1040
+ " require([\"vega-embed\"], displayChart, err => showError(`Error loading script: ${err.message}`));\n",
1041
+ " } else {\n",
1042
+ " maybeLoadScript(\"vega\", \"5\")\n",
1043
+ " .then(() => maybeLoadScript(\"vega-lite\", \"4.17.0\"))\n",
1044
+ " .then(() => maybeLoadScript(\"vega-embed\", \"6\"))\n",
1045
+ " .catch(showError)\n",
1046
+ " .then(() => displayChart(vegaEmbed));\n",
1047
+ " }\n",
1048
+ " })({\"config\": {\"view\": {\"continuousWidth\": 400, \"continuousHeight\": 300, \"strokeWidth\": 0}, \"axis\": {\"grid\": false}}, \"layer\": [{\"mark\": \"bar\", \"encoding\": {\"color\": {\"field\": \"Weight\", \"legend\": null, \"scale\": {\"domain\": [0], \"range\": \"diverging\", \"scheme\": \"blueorange\", \"type\": \"threshold\"}, \"type\": \"quantitative\"}, \"tooltip\": [{\"field\": \"Word\", \"type\": \"nominal\"}, {\"field\": \"Weight\", \"type\": \"quantitative\"}], \"x\": {\"field\": \"Weight\", \"scale\": {\"domain\": [-1, 1]}, \"type\": \"quantitative\"}, \"y\": {\"axis\": null, \"field\": \"Word\", \"sort\": \"x\", \"type\": \"nominal\"}}, \"title\": \"Importance of individual words\"}, {\"mark\": {\"type\": \"text\", \"align\": \"right\", \"baseline\": \"middle\", \"fill\": \"black\"}, \"encoding\": {\"color\": {\"field\": \"Weight\", \"legend\": null, \"scale\": {\"domain\": [0], \"range\": \"diverging\", \"scheme\": \"blueorange\", \"type\": \"threshold\"}, \"type\": \"quantitative\"}, \"text\": {\"field\": \"Word\", \"type\": \"nominal\"}, \"tooltip\": [{\"field\": \"Word\", \"type\": \"nominal\"}, {\"field\": \"Weight\", \"type\": \"quantitative\"}], \"x\": {\"field\": \"Weight\", \"scale\": {\"domain\": [-1, 1]}, \"type\": \"quantitative\"}, \"y\": {\"axis\": null, \"field\": \"Word\", \"sort\": \"x\", \"type\": \"nominal\"}}, \"title\": \"Importance of individual words\"}], \"data\": {\"name\": \"data-d686d7fc533c26b0bdc6066e4351f840\"}, \"width\": 300, \"$schema\": \"https://vega.github.io/schema/vega-lite/v4.17.0.json\", \"datasets\": {\"data-d686d7fc533c26b0bdc6066e4351f840\": [{\"Word\": \"with\", \"Weight\": 0.3289028288853927}, {\"Word\": \"woman\", \"Weight\": -0.26094440033196564}, {\"Word\": \"asian\", \"Weight\": 0.24561077002890458}, {\"Word\": \"walking\", \"Weight\": 0.19194218998931795}, {\"Word\": \"white\", \"Weight\": -0.14942503537339621}, {\"Word\": \"down\", \"Weight\": -0.14547403123420313}, {\"Word\": \"the\", \"Weight\": 0.14096934306553166}, {\"Word\": \"I\", \"Weight\": -0.08672932329874143}, {\"Word\": \"street\", \"Weight\": 0.06704680513000527}, {\"Word\": \"a\", \"Weight\": -0.03171807940472653}, {\"Word\": \"an\", \"Weight\": -0.006746730007490843}, {\"Word\": \"saw\", \"Weight\": 0.0019276122088497296}, {\"Word\": \"man\", \"Weight\": -0.0005652423244728638}]}}, {\"mode\": \"vega-lite\"});\n",
1049
+ "</script>"
1050
+ ],
1051
+ "text/plain": [
1052
+ "alt.LayerChart(...)"
1053
+ ]
1054
+ },
1055
+ "execution_count": 24,
1056
+ "metadata": {},
1057
+ "output_type": "execute_result"
1058
+ }
1059
+ ],
1060
+ "source": [
1061
+ "lime_viz(lime8)"
1062
+ ]
1063
+ },
1064
+ {
1065
+ "cell_type": "code",
1066
+ "execution_count": 25,
1067
+ "id": "816e1c4b-7f02-41b1-b430-2f3750ae6c4a",
1068
+ "metadata": {},
1069
+ "outputs": [
1070
+ {
1071
+ "name": "stdout",
1072
+ "output_type": "stream",
1073
+ "text": [
1074
+ "No options added for 'I' \n",
1075
+ "From a white woman, ['white', 'woman'] added to pos_options due to wildcard.\n",
1076
+ "From the street, ['street'] added to pos_options due to wildcard.\n",
1077
+ "From an asian man, ['asian', 'man'] added to pos_options due to wildcard.\n"
1078
+ ]
1079
+ }
1080
+ ],
1081
+ "source": [
1082
+ "probability, sentiment = eval_pred_test(test8, return_all=True)\n",
1083
+ "options, lime = critical_words(test8,options=True)"
1084
+ ]
1085
+ },
1086
+ {
1087
+ "cell_type": "code",
1088
+ "execution_count": 38,
1089
+ "id": "a437a4eb-73b3-4b3c-a719-8dde2ad6dd3c",
1090
+ "metadata": {},
1091
+ "outputs": [
1092
+ {
1093
+ "name": "stdout",
1094
+ "output_type": "stream",
1095
+ "text": [
1096
+ "From I, [] added to pos_options due to wildcard.\n",
1097
+ "From men, ['men'] added to pos_options due to wildcard.\n",
1098
+ "From women, ['women'] added to pos_options due to wildcard.\n",
1099
+ "From the same respect, ['same', 'respect'] added to pos_options due to wildcard.\n"
1100
+ ]
1101
+ }
1102
+ ],
1103
+ "source": [
1104
+ "bug = \"I find men and women deserve the same respect.\"\n",
1105
+ "options = critical_words(bug)"
1106
+ ]
1107
+ },
1108
+ {
1109
+ "cell_type": "code",
1110
+ "execution_count": 29,
1111
+ "id": "8676defd-0908-4218-a1d6-218de3fb7119",
1112
+ "metadata": {},
1113
+ "outputs": [],
1114
+ "source": [
1115
+ "bug_doc = nlp(bug)"
1116
+ ]
1117
+ },
1118
+ {
1119
+ "cell_type": "code",
1120
+ "execution_count": 35,
1121
+ "id": "21b9e39b-2fcd-4c6f-8fe6-0d571cd79cca",
1122
+ "metadata": {},
1123
+ "outputs": [
1124
+ {
1125
+ "name": "stdout",
1126
+ "output_type": "stream",
1127
+ "text": [
1128
+ "I\n",
1129
+ "PRON\n",
1130
+ "a man\n",
1131
+ "NOUN\n",
1132
+ "woman\n",
1133
+ "NOUN\n",
1134
+ "the same respect\n",
1135
+ "NOUN\n"
1136
+ ]
1137
+ }
1138
+ ],
1139
+ "source": [
1140
+ "for chunk in bug_doc.noun_chunks:\n",
1141
+ " print(chunk.text)\n",
1142
+ " print(chunk[-1].pos_)"
1143
+ ]
1144
+ },
1145
+ {
1146
+ "cell_type": "code",
1147
+ "execution_count": null,
1148
+ "id": "38279d2d-e763-4329-a65e-1a67d6f5ebb8",
1149
+ "metadata": {},
1150
+ "outputs": [],
1151
+ "source": []
1152
+ }
1153
+ ],
1154
+ "metadata": {
1155
+ "kernelspec": {
1156
+ "display_name": "Python 3 (ipykernel)",
1157
+ "language": "python",
1158
+ "name": "python3"
1159
+ },
1160
+ "language_info": {
1161
+ "codemirror_mode": {
1162
+ "name": "ipython",
1163
+ "version": 3
1164
+ },
1165
+ "file_extension": ".py",
1166
+ "mimetype": "text/x-python",
1167
+ "name": "python",
1168
+ "nbconvert_exporter": "python",
1169
+ "pygments_lexer": "ipython3",
1170
+ "version": "3.8.8"
1171
+ }
1172
+ },
1173
+ "nbformat": 4,
1174
+ "nbformat_minor": 5
1175
+ }
WNgen.py ADDED
@@ -0,0 +1,313 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #Import necessary libraries.
2
+ import re, nltk, pandas as pd, numpy as np, ssl, streamlit as st
3
+ from nltk.corpus import wordnet
4
+ import spacy
5
+ nlp = spacy.load("en_core_web_lg")
6
+
7
+ #Import necessary parts for predicting things.
8
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification, TextClassificationPipeline
9
+ import torch
10
+ import torch.nn.functional as F
11
+ tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")
12
+ model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")
13
+ pipe = TextClassificationPipeline(model=model, tokenizer=tokenizer, return_all_scores=True)
14
+
15
+ #If an error is thrown that the corpus "omw-1.4" isn't discoverable you can use this code. (https://stackoverflow.com/questions/38916452/nltk-download-ssl-certificate-verify-failed)
16
+ '''try:
17
+ _create_unverified_https_context = ssl._create_unverified_context
18
+ except AttributeError:
19
+ pass
20
+ else:
21
+ ssl._create_default_https_context = _create_unverified_https_context
22
+
23
+ nltk.download('omw-1.4')'''
24
+
25
+ # A simple function to pull synonyms and antonyms using spacy's POS
26
+ def syn_ant(word,POS=False,human=True):
27
+ pos_options = ['NOUN','VERB','ADJ','ADV']
28
+ synonyms = []
29
+ antonyms = []
30
+ #WordNet hates spaces so you have to remove them
31
+ if " " in word:
32
+ word = word.replace(" ", "_")
33
+
34
+ if POS in pos_options:
35
+ for syn in wordnet.synsets(word, pos=getattr(wordnet, POS)):
36
+ for l in syn.lemmas():
37
+ current = l.name()
38
+ if human:
39
+ current = re.sub("_"," ",current)
40
+ synonyms.append(current)
41
+ if l.antonyms():
42
+ for ant in l.antonyms():
43
+ cur_ant = ant.name()
44
+ if human:
45
+ cur_ant = re.sub("_"," ",cur_ant)
46
+ antonyms.append(cur_ant)
47
+ else:
48
+ for syn in wordnet.synsets(word):
49
+ for l in syn.lemmas():
50
+ current = l.name()
51
+ if human:
52
+ current = re.sub("_"," ",current)
53
+ synonyms.append(current)
54
+ if l.antonyms():
55
+ for ant in l.antonyms():
56
+ cur_ant = ant.name()
57
+ if human:
58
+ cur_ant = re.sub("_"," ",cur_ant)
59
+ antonyms.append(cur_ant)
60
+ synonyms = list(set(synonyms))
61
+ antonyms = list(set(antonyms))
62
+ return synonyms, antonyms
63
+
64
+ def process_text(text):
65
+ doc = nlp(text.lower())
66
+ result = []
67
+ for token in doc:
68
+ if (token.is_stop) or (token.is_punct) or (token.lemma_ == '-PRON-'):
69
+ continue
70
+ result.append(token.lemma_)
71
+ return " ".join(result)
72
+
73
+ def clean_definition(syn):
74
+ #This function removes stop words from sentences to improve on document level similarity for differentiation.
75
+ if type(syn) is str:
76
+ synset = wordnet.synset(syn).definition()
77
+ elif type(syn) is nltk.corpus.reader.wordnet.Synset:
78
+ synset = syn.definition()
79
+ definition = nlp(process_text(synset))
80
+ return definition
81
+
82
+ def check_sim(a,b):
83
+ if type(a) is str and type(b) is str:
84
+ a = nlp(a)
85
+ b = nlp(b)
86
+ similarity = a.similarity(b)
87
+ return similarity
88
+
89
+ # Builds a dataframe dynamically from WordNet using NLTK.
90
+ def wordnet_df(word,POS=False,seed_definition=None):
91
+ pos_options = ['NOUN','VERB','ADJ','ADV']
92
+ synonyms, antonyms = syn_ant(word,POS,False)
93
+ #print(synonyms, antonyms) #for QA purposes
94
+ words = []
95
+ cats = []
96
+ #WordNet hates spaces so you have to remove them
97
+ m_word = word.replace(" ", "_")
98
+
99
+ #Allow the user to pick a seed definition if it is not provided directly to the function. Currently not working so it's commented out.
100
+ '''#commented out the way it was designed to allow for me to do it through Streamlit (keeping it for posterity, and for anyone who wants to use it without streamlit.)
101
+ for d in range(len(seed_definitions)):
102
+ print(f"{d}: {seed_definitions[d]}")
103
+ #choice = int(input("Which of the definitions above most aligns to your selection?"))
104
+ seed_definition = seed_definitions[choice]'''
105
+ try:
106
+ definition = seed_definition
107
+ except:
108
+ st.write("You did not supply a definition.")
109
+
110
+ if POS in pos_options:
111
+ for syn in wordnet.synsets(m_word, pos=getattr(wordnet, POS)):
112
+ if check_sim(process_text(seed_definition),process_text(syn.definition())) > .7:
113
+ cur_lemmas = syn.lemmas()
114
+ hypos = syn.hyponyms()
115
+ for hypo in hypos:
116
+ cur_lemmas.extend(hypo.lemmas())
117
+ for lemma in cur_lemmas:
118
+ ll = lemma.name()
119
+ cats.append(re.sub("_"," ", syn.name().split(".")[0]))
120
+ words.append(re.sub("_"," ",ll))
121
+
122
+ if len(synonyms) > 0:
123
+ for w in synonyms:
124
+ w = w.replace(" ","_")
125
+ for syn in wordnet.synsets(w, pos=getattr(wordnet, POS)):
126
+ if check_sim(process_text(seed_definition),process_text(syn.definition())) > .6:
127
+ cur_lemmas = syn.lemmas()
128
+ hypos = syn.hyponyms()
129
+ for hypo in hypos:
130
+ cur_lemmas.extend(hypo.lemmas())
131
+ for lemma in cur_lemmas:
132
+ ll = lemma.name()
133
+ cats.append(re.sub("_"," ", syn.name().split(".")[0]))
134
+ words.append(re.sub("_"," ",ll))
135
+ if len(antonyms) > 0:
136
+ for a in antonyms:
137
+ a = a.replace(" ","_")
138
+ for syn in wordnet.synsets(a, pos=getattr(wordnet, POS)):
139
+ if check_sim(process_text(seed_definition),process_text(syn.definition())) > .26:
140
+ cur_lemmas = syn.lemmas()
141
+ hypos = syn.hyponyms()
142
+ for hypo in hypos:
143
+ cur_lemmas.extend(hypo.lemmas())
144
+ for lemma in cur_lemmas:
145
+ ll = lemma.name()
146
+ cats.append(re.sub("_"," ", syn.name().split(".")[0]))
147
+ words.append(re.sub("_"," ",ll))
148
+ else:
149
+ for syn in wordnet.synsets(m_word):
150
+ if check_sim(process_text(seed_definition),process_text(syn.definition())) > .7:
151
+ cur_lemmas = syn.lemmas()
152
+ hypos = syn.hyponyms()
153
+ for hypo in hypos:
154
+ cur_lemmas.extend(hypo.lemmas())
155
+ for lemma in cur_lemmas:
156
+ ll = lemma.name()
157
+ cats.append(re.sub("_"," ", syn.name().split(".")[0]))
158
+ words.append(re.sub("_"," ",ll))
159
+ if len(synonyms) > 0:
160
+ for w in synonyms:
161
+ w = w.replace(" ","_")
162
+ for syn in wordnet.synsets(w):
163
+ if check_sim(process_text(seed_definition),process_text(syn.definition())) > .6:
164
+ cur_lemmas = syn.lemmas()
165
+ hypos = syn.hyponyms()
166
+ for hypo in hypos:
167
+ cur_lemmas.extend(hypo.lemmas())
168
+ for lemma in cur_lemmas:
169
+ ll = lemma.name()
170
+ cats.append(re.sub("_"," ", syn.name().split(".")[0]))
171
+ words.append(re.sub("_"," ",ll))
172
+ if len(antonyms) > 0:
173
+ for a in antonyms:
174
+ a = a.replace(" ","_")
175
+ for syn in wordnet.synsets(a):
176
+ if check_sim(process_text(seed_definition),process_text(syn.definition())) > .26:
177
+ cur_lemmas = syn.lemmas()
178
+ hypos = syn.hyponyms()
179
+ for hypo in hypos:
180
+ cur_lemmas.extend(hypo.lemmas())
181
+ for lemma in cur_lemmas:
182
+ ll = lemma.name()
183
+ cats.append(re.sub("_"," ", syn.name().split(".")[0]))
184
+ words.append(re.sub("_"," ",ll))
185
+
186
+ df = {"Categories":cats, "Words":words}
187
+ df = pd.DataFrame(df)
188
+ df = df.drop_duplicates().reset_index()
189
+ df = df.drop("index", axis=1)
190
+ return df
191
+
192
+ def eval_pred_test(text, return_all = False):
193
+ '''A basic function for evaluating the prediction from the model and turning it into a visualization friendly number.'''
194
+ preds = pipe(text)
195
+ neg_score = -1 * preds[0][0]['score']
196
+ sent_neg = preds[0][0]['label']
197
+ pos_score = preds[0][1]['score']
198
+ sent_pos = preds[0][1]['label']
199
+ prediction = 0
200
+ sentiment = ''
201
+ if pos_score > abs(neg_score):
202
+ prediction = pos_score
203
+ sentiment = sent_pos
204
+ elif abs(neg_score) > pos_score:
205
+ prediction = neg_score
206
+ sentiment = sent_neg
207
+
208
+ if return_all:
209
+ return prediction, sentiment
210
+ else:
211
+ return prediction
212
+
213
+ def get_parallel(word, seed_definition, QA=False):
214
+ cleaned = nlp(process_text(seed_definition))
215
+ root_syns = wordnet.synsets(word)
216
+ hypers = []
217
+ new_hypos = []
218
+
219
+ for syn in root_syns:
220
+ hypers.extend(syn.hypernyms())
221
+
222
+ for syn in hypers:
223
+ new_hypos.extend(syn.hyponyms())
224
+
225
+ hypos = list(set([syn for syn in new_hypos if cleaned.similarity(nlp(process_text(syn.definition()))) >=.75]))[:25]
226
+ # with st.sidebar:
227
+ # st.write(f"The number of hypos is {len(hypos)} during get Parallel at Similarity >= .75.") #QA
228
+
229
+ if len(hypos) <= 1:
230
+ hypos = root_syns
231
+ elif len(hypos) < 3:
232
+ hypos = list(set([syn for syn in new_hypos if cleaned.similarity(nlp(process_text(syn.definition()))) >=.5]))[:25] # added a cap to each
233
+ elif len(hypos) < 10:
234
+ hypos = list(set([syn for syn in new_hypos if cleaned.similarity(nlp(process_text(syn.definition()))) >=.66]))[:25]
235
+ elif len(hypos) >= 10:
236
+ hypos = list(set([syn for syn in new_hypos if cleaned.similarity(nlp(process_text(syn.definition()))) >=.8]))[:25]
237
+ if QA:
238
+ print(hypers)
239
+ print(hypos)
240
+ return hypers, hypos
241
+ else:
242
+ return hypos
243
+
244
+ # Builds a dataframe dynamically from WordNet using NLTK.
245
+ def wordnet_parallel_df(word,seed_definition=None):
246
+ words = []
247
+ cats = []
248
+ #WordNet hates spaces so you have to remove them
249
+ m_word = word.replace(" ", "_")
250
+
251
+ # add synonyms and antonyms for diversity
252
+ synonyms, antonyms = syn_ant(word)
253
+ words.extend(synonyms)
254
+ cats.extend(["synonyms" for n in range(len(synonyms))])
255
+ words.extend(antonyms)
256
+ cats.extend(["antonyms" for n in range(len(antonyms))])
257
+
258
+ try:
259
+ hypos = get_parallel(m_word,seed_definition)
260
+ except:
261
+ st.write("You did not supply a definition.")
262
+ #Allow the user to pick a seed definition if it is not provided directly to the function.
263
+ '''if seed_definition is None:
264
+ if POS in pos_options:
265
+ seed_definitions = [syn.definition() for syn in wordnet.synsets(m_word, pos=getattr(wordnet, POS))]
266
+ else:
267
+ seed_definitions = [syn.definition() for syn in wordnet.synsets(m_word)]
268
+ for d in range(len(seed_definitions)):
269
+ print(f"{d}: {seed_definitions[d]}")
270
+ choice = int(input("Which of the definitions above most aligns to your selection?"))
271
+ seed_definition = seed_definitions[choice]'''
272
+
273
+ #This is a QA section
274
+ # with st.sidebar:
275
+ # st.write(f"The number of hypos is {len(hypos)} during parallel df creation.") #QA
276
+
277
+ #Transforms hypos into lemmas
278
+ for syn in hypos:
279
+ cur_lemmas = syn.lemmas()
280
+ hypos = syn.hyponyms()
281
+ for hypo in hypos:
282
+ cur_lemmas.extend(hypo.lemmas())
283
+ for lemma in cur_lemmas:
284
+ ll = lemma.name()
285
+ cats.append(re.sub("_"," ", syn.name().split(".")[0]))
286
+ words.append(re.sub("_"," ",ll))
287
+ # with st.sidebar:
288
+ # st.write(f'There are {len(words)} words in the dataframe at the beginning of df creation.') #QA
289
+
290
+ df = {"Categories":cats, "Words":words}
291
+ df = pd.DataFrame(df)
292
+ df = df.drop_duplicates("Words").reset_index()
293
+ df = df.drop("index", axis=1)
294
+ return df
295
+
296
+ #@st.experimental_singleton(suppress_st_warning=True)
297
+ def cf_from_wordnet_df(seed,text,seed_definition=False):
298
+ seed_token = nlp(seed)
299
+ seed_POS = seed_token[0].pos_
300
+ #print(seed_POS) QA
301
+ try:
302
+ df = wordnet_parallel_df(seed,seed_definition)
303
+ except:
304
+ st.write("You did not supply a definition.")
305
+
306
+ df["text"] = df.Words.apply(lambda x: re.sub(r'\b'+seed+r'\b',x,text))
307
+ df["similarity"] = df.Words.apply(lambda x: seed_token[0].similarity(nlp(x)[0]))
308
+ df = df[df["similarity"] > 0].reset_index()
309
+ df.drop("index", axis=1, inplace=True)
310
+ df["pred"] = df.text.apply(eval_pred_test)
311
+ # added this because I think it will make the end results better if we ensure the seed is in the data we generate counterfactuals from.
312
+ df['seed'] = df.Words.apply(lambda x: 'seed' if x.lower() == seed.lower() else 'alternative')
313
+ return df
app.py ADDED
@@ -0,0 +1,340 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #Import the libraries we know we'll need for the Generator.
2
+ import pandas as pd, spacy, nltk, numpy as np
3
+ from spacy.matcher import Matcher
4
+ nlp = spacy.load("en_core_web_lg")
5
+
6
+ #Import the libraries to support the model and predictions.
7
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification, TextClassificationPipeline
8
+ import lime
9
+ import torch
10
+ import torch.nn.functional as F
11
+ from lime.lime_text import LimeTextExplainer
12
+
13
+ #Import the libraries for human interaction and visualization.
14
+ import altair as alt
15
+ import streamlit as st
16
+ from annotated_text import annotated_text as ant
17
+
18
+ #Import functions needed to build dataframes of keywords from WordNet
19
+ from WNgen import *
20
+ from NLselector import *
21
+
22
+ @st.experimental_singleton
23
+ def set_up_explainer():
24
+ class_names = ['negative', 'positive']
25
+ explainer = LimeTextExplainer(class_names=class_names)
26
+ return explainer
27
+
28
+ @st.experimental_singleton
29
+ def prepare_model():
30
+ tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")
31
+ model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")
32
+ pipe = TextClassificationPipeline(model=model, tokenizer=tokenizer, return_all_scores=True)
33
+ return tokenizer, model, pipe
34
+
35
+ @st.experimental_singleton
36
+ def prepare_lists():
37
+ countries = pd.read_csv("Assets/Countries/combined-countries.csv")
38
+ professions = pd.read_csv("Assets/Professions/soc-professions-2018.csv")
39
+ word_lists = [list(countries.Words),list(professions.Words)]
40
+ return countries, professions, word_lists
41
+
42
+ #Provide all the functions necessary to run the app
43
+ #get definitions for control flow in Streamlit
44
+ def get_def(word, POS=False):
45
+ pos_options = ['NOUN','VERB','ADJ','ADV']
46
+ m_word = word.replace(" ", "_")
47
+ if POS in pos_options:
48
+ seed_definitions = [syn.definition() for syn in wordnet.synsets(m_word, pos=getattr(wordnet, POS))]
49
+ else:
50
+ seed_definitions = [syn.definition() for syn in wordnet.synsets(m_word)]
51
+ seed_definition = col1.selectbox("Which definition is most relevant?", seed_definitions, key= "WN_definition")
52
+ if col1.button("Choose Definition"):
53
+ col1.write("You've chosen a definition.")
54
+ st.session_state.definition = seed_definition
55
+ return seed_definition
56
+ else:
57
+ col1.write("Please choose a definition.")
58
+
59
+ ###Start coding the actual app###
60
+ st.set_page_config(layout="wide", page_title="Natural Language Counterfactuals (NLC)")
61
+ layouts = ['Natural Language Explanation', 'Lime Explanation', 'MultiNLC', 'MultiNLC + Lime', 'VizNLC']
62
+ alternatives = ['Similarity', 'Sampling (Random)', 'Sampling (Fixed)', 'Probability']
63
+ alt_choice = "Similarity"
64
+
65
+ #Content in the Sidebar.
66
+ st.sidebar.info('This is an interface for exploring how different interfaces for exploring natural language explanations (NLE) may appear to people. It is intended to allow individuals to provide feedback on specific versions, as well as to compare what one offers over others for the same inputs.')
67
+ layout = st.sidebar.selectbox("Select a layout to explore.", layouts)
68
+ alt_choice = st.sidebar.selectbox("Choose the way you want to display alternatives.", alternatives) #Commented out until we decide this is useful functionality.
69
+
70
+ #Set up the Main Area Layout
71
+ st.title('Natural Language Counterfactuals (NLC) Prototype')
72
+ st.subheader(f'Current Layout: {layout}')
73
+ text = st.text_input('Provide a sentence you want to evaluate.', placeholder = "I like you. I love you.", key="input")
74
+
75
+ #Prepare the model, data, and Lime. Set starting variables.
76
+ tokenizer, model, pipe = prepare_model()
77
+ countries, professions, word_lists = prepare_lists()
78
+ explainer = set_up_explainer()
79
+ text2 = ""
80
+ text3 = ""
81
+ cf_df = pd.DataFrame()
82
+ if 'definition' not in st.session_state:
83
+ st.session_state.definition = "<(^_')>"
84
+
85
+ #Outline the various user interfaces we have built.
86
+
87
+ col1, col2, col3 = st.columns(3)
88
+ if layout == 'Natural Language Explanation':
89
+ with col1:
90
+ if st.session_state.input != "":
91
+ st.caption("This is the sentence you provided.")
92
+ st.write(text)
93
+ probability, sentiment = eval_pred(text, return_all=True)
94
+ nat_lang_explanation = construct_nlexp(text,sentiment,probability)
95
+
96
+ if layout == 'Lime Explanation':
97
+ with col1:
98
+ #Use spaCy to make the sentence into a doc so we can do NLP.
99
+ doc = nlp(st.session_state.input)
100
+ #Evaluate the provided sentence for sentiment and probability.
101
+ if st.session_state.input != "":
102
+ st.caption("This is the sentence you provided.")
103
+ st.write(text)
104
+ probability, sentiment = eval_pred(text, return_all=True)
105
+ options, lime = critical_words(st.session_state.input,options=True)
106
+ nat_lang_explanation = construct_nlexp(text,sentiment,probability)
107
+ st.write(" ")
108
+ st.altair_chart(lime_viz(lime))
109
+
110
+ if layout == 'MultiNLC':
111
+ with col1:
112
+ #Use spaCy to make the sentence into a doc so we can do NLP.
113
+ doc = nlp(st.session_state.input)
114
+ #Evaluate the provided sentence for sentiment and probability.
115
+ if st.session_state.input != "":
116
+ st.caption("This is the sentence you provided.")
117
+ st.write(text)
118
+ probability, sentiment = eval_pred(text, return_all=True)
119
+ options, lime = critical_words(st.session_state.input,options=True)
120
+ nat_lang_explanation = construct_nlexp(text,sentiment,probability)
121
+
122
+ #Allow the user to pick an option to generate counterfactuals from.
123
+ option = st.radio('Which word would you like to use to generate alternatives?', options, key = "option")
124
+ if (any(option in sublist for sublist in word_lists)):
125
+ st.write(f'You selected {option}. It matches a list.')
126
+ elif option:
127
+ st.write(f'You selected {option}. It does not match a list.')
128
+ definition = get_def(option)
129
+ else:
130
+ st.write('Awaiting your selection.')
131
+
132
+ if st.button('Generate Alternatives'):
133
+ if option in list(countries.Words):
134
+ cf_df = gen_cf_country(countries, doc, option)
135
+ st.success('Alternatives created.')
136
+ elif option in list(professions.Words):
137
+ cf_df = gen_cf_profession(professions, doc, option)
138
+ st.success('Alternatives created.')
139
+ else:
140
+ with st.sidebar:
141
+ ant("Generating alternatives for",(option,"opt","#E0FBFB"), "with a definition of: ",(st.session_state.definition,"def","#E0FBFB"),".")
142
+ cf_df = cf_from_wordnet_df(option,text,seed_definition=st.session_state.definition)
143
+ st.success('Alternatives created.')
144
+
145
+ if len(cf_df) != 0:
146
+ if alt_choice == "Similarity":
147
+ text2, text3 = get_min_max(cf_df, option)
148
+ col2.caption(f"This sentence is 'similar' to {option}.")
149
+ col3.caption(f"This sentence is 'not similar' to {option}.")
150
+ elif alt_choice == "Sampling (Random)":
151
+ text2, text3 = sampled_alts(cf_df, option)
152
+ col2.caption(f"This sentence is a random sample from the alternatives.")
153
+ col3.caption(f"This sentence is a random sample from the alternatives.")
154
+ elif alt_choice == "Sampling (Fixed)":
155
+ text2, text3 = sampled_alts(cf_df, option, fixed=True)
156
+ col2.caption(f"This sentence is a fixed sample of the alternatives.")
157
+ col3.caption(f"This sentence is a fixed sample of the alternatives.")
158
+ elif alt_choice == "Probability":
159
+ text2, text3 = abs_dif(cf_df, option)
160
+ col2.caption(f"This sentence is the closest prediction in the model.")
161
+ col3.caption(f"This sentence is the farthest prediction in the model.")
162
+ with st.sidebar:
163
+ st.info(f"Alternatives generated: {len(cf_df)}")
164
+
165
+ with col2:
166
+ if text2 != "":
167
+ sim2 = cf_df.loc[cf_df['text'] == text2, 'similarity'].iloc[0]
168
+ st.write(text2)
169
+ probability2, sentiment2 = eval_pred(text2, return_all=True)
170
+ nat_lang_explanation = construct_nlexp(text2,sentiment2,probability2)
171
+ #st.info(f" Similarity Score: {np.round(sim2, 2)}, Num Checked: {len(cf_df)}") #for QA purposes
172
+
173
+ with col3:
174
+ if text3 != "":
175
+ sim3 = cf_df.loc[cf_df['text'] == text3, 'similarity'].iloc[0]
176
+ st.write(text3)
177
+ probability3, sentiment3 = eval_pred(text3, return_all=True)
178
+ nat_lang_explanation = construct_nlexp(text3,sentiment3,probability3)
179
+ #st.info(f"Similarity Score: {np.round(sim3, 2)}, Num Checked: {len(cf_df)}") #for QA purposes
180
+
181
+ if layout == 'MultiNLC + Lime':
182
+ with col1:
183
+
184
+ #Use spaCy to make the sentence into a doc so we can do NLP.
185
+ doc = nlp(st.session_state.input)
186
+ #Evaluate the provided sentence for sentiment and probability.
187
+ if st.session_state.input != "":
188
+ st.caption("This is the sentence you provided.")
189
+ st.write(text)
190
+ probability, sentiment = eval_pred(text, return_all=True)
191
+ options, lime = critical_words(st.session_state.input,options=True)
192
+ nat_lang_explanation = construct_nlexp(text,sentiment,probability)
193
+ st.write(" ")
194
+ st.altair_chart(lime_viz(lime))
195
+
196
+ #Allow the user to pick an option to generate counterfactuals from.
197
+ option = st.radio('Which word would you like to use to generate alternatives?', options, key = "option")
198
+ if (any(option in sublist for sublist in word_lists)):
199
+ st.write(f'You selected {option}. It matches a list.')
200
+ elif option:
201
+ st.write(f'You selected {option}. It does not match a list.')
202
+ definition = get_def(option)
203
+ else:
204
+ st.write('Awaiting your selection.')
205
+
206
+ if st.button('Generate Alternatives'):
207
+ if option in list(countries.Words):
208
+ cf_df = gen_cf_country(countries, doc, option)
209
+ st.success('Alternatives created.')
210
+ elif option in list(professions.Words):
211
+ cf_df = gen_cf_profession(professions, doc, option)
212
+ st.success('Alternatives created.')
213
+ else:
214
+ with st.sidebar:
215
+ ant("Generating alternatives for",(option,"opt","#E0FBFB"), "with a definition of: ",(st.session_state.definition,"def","#E0FBFB"),".")
216
+ cf_df = cf_from_wordnet_df(option,text,seed_definition=st.session_state.definition)
217
+ st.success('Alternatives created.')
218
+
219
+ if len(cf_df) != 0:
220
+ if alt_choice == "Similarity":
221
+ text2, text3 = get_min_max(cf_df, option)
222
+ col2.caption(f"This sentence is 'similar' to {option}.")
223
+ col3.caption(f"This sentence is 'not similar' to {option}.")
224
+ elif alt_choice == "Sampling (Random)":
225
+ text2, text3 = sampled_alts(cf_df, option)
226
+ col2.caption(f"This sentence is a random sample from the alternatives.")
227
+ col3.caption(f"This sentence is a random sample from the alternatives.")
228
+ elif alt_choice == "Sampling (Fixed)":
229
+ text2, text3 = sampled_alts(cf_df, option, fixed=True)
230
+ col2.caption(f"This sentence is a fixed sample of the alternatives.")
231
+ col3.caption(f"This sentence is a fixed sample of the alternatives.")
232
+ elif alt_choice == "Probability":
233
+ text2, text3 = abs_dif(cf_df, option)
234
+ col2.caption(f"This sentence is the closest prediction in the model.")
235
+ col3.caption(f"This sentence is the farthest prediction in the model.")
236
+ with st.sidebar:
237
+ st.info(f"Alternatives generated: {len(cf_df)}")
238
+
239
+ with col2:
240
+ if text2 != "":
241
+ sim2 = cf_df.loc[cf_df['text'] == text2, 'similarity'].iloc[0]
242
+ st.write(text2)
243
+ probability2, sentiment2 = eval_pred(text2, return_all=True)
244
+ nat_lang_explanation = construct_nlexp(text2,sentiment2,probability2)
245
+ exp2 = explainer.explain_instance(text2, predictor, num_features=15, num_samples=2000)
246
+ lime_results2 = exp2.as_list()
247
+ st.write(" ")
248
+ st.altair_chart(lime_viz(lime_results2))
249
+
250
+ with col3:
251
+ if text3 != "":
252
+ sim3 = cf_df.loc[cf_df['text'] == text3, 'similarity'].iloc[0]
253
+ st.write(text3)
254
+ probability3, sentiment3 = eval_pred(text3, return_all=True)
255
+ nat_lang_explanation = construct_nlexp(text3,sentiment3,probability3)
256
+ exp3 = explainer.explain_instance(text3, predictor, num_features=15, num_samples=2000)
257
+ lime_results3 = exp3.as_list()
258
+ st.write(" ")
259
+ st.altair_chart(lime_viz(lime_results3))
260
+
261
+ if layout == 'VizNLC':
262
+ with col1:
263
+
264
+ #Use spaCy to make the sentence into a doc so we can do NLP.
265
+ doc = nlp(st.session_state.input)
266
+ #Evaluate the provided sentence for sentiment and probability.
267
+ if st.session_state.input != "":
268
+ st.caption("This is the sentence you provided.")
269
+ st.write(text)
270
+ probability, sentiment = eval_pred(text, return_all=True)
271
+ options, lime = critical_words(st.session_state.input,options=True)
272
+ nat_lang_explanation = construct_nlexp(text,sentiment,probability)
273
+ st.write(" ")
274
+ st.altair_chart(lime_viz(lime))
275
+
276
+ #Allow the user to pick an option to generate counterfactuals from.
277
+ option = st.radio('Which word would you like to use to generate alternatives?', options, key = "option")
278
+ if (any(option in sublist for sublist in word_lists)):
279
+ st.write(f'You selected {option}. It matches a list.')
280
+ elif option:
281
+ st.write(f'You selected {option}. It does not match a list.')
282
+ definition = get_def(option)
283
+ else:
284
+ st.write('Awaiting your selection.')
285
+
286
+ if st.button('Generate Alternatives'):
287
+ if option in list(countries.Words):
288
+ cf_df = gen_cf_country(countries, doc, option)
289
+ st.success('Alternatives created.')
290
+ elif option in list(professions.Words):
291
+ cf_df = gen_cf_profession(professions, doc, option)
292
+ st.success('Alternatives created.')
293
+ else:
294
+ with st.sidebar:
295
+ ant("Generating alternatives for",(option,"opt","#E0FBFB"), "with a definition of: ",(st.session_state.definition,"def","#E0FBFB"),".")
296
+ cf_df = cf_from_wordnet_df(option,text,seed_definition=st.session_state.definition)
297
+ st.success('Alternatives created.')
298
+
299
+ if len(cf_df) != 0:
300
+ if alt_choice == "Similarity":
301
+ text2, text3 = get_min_max(cf_df, option)
302
+ col2.caption(f"This sentence is 'similar' to {option}.")
303
+ col3.caption(f"This sentence is 'not similar' to {option}.")
304
+ elif alt_choice == "Sampling (Random)":
305
+ text2, text3 = sampled_alts(cf_df, option)
306
+ col2.caption(f"This sentence is a random sample from the alternatives.")
307
+ col3.caption(f"This sentence is a random sample from the alternatives.")
308
+ elif alt_choice == "Sampling (Fixed)":
309
+ text2, text3 = sampled_alts(cf_df, option, fixed=True)
310
+ col2.caption(f"This sentence is a fixed sample of the alternatives.")
311
+ col3.caption(f"This sentence is a fixed sample of the alternatives.")
312
+ elif alt_choice == "Probability":
313
+ text2, text3 = abs_dif(cf_df, option)
314
+ col2.caption(f"This sentence is the closest prediction in the model.")
315
+ col3.caption(f"This graph represents the {len(cf_df)} alternatives to {option}.")
316
+ with st.sidebar:
317
+ st.info(f"Alternatives generated: {len(cf_df)}")
318
+
319
+ with col2:
320
+ if text2 != "":
321
+ sim2 = cf_df.loc[cf_df['text'] == text2, 'similarity'].iloc[0]
322
+ st.write(text2)
323
+ probability2, sentiment2 = eval_pred(text2, return_all=True)
324
+ nat_lang_explanation = construct_nlexp(text2,sentiment2,probability2)
325
+ exp2 = explainer.explain_instance(text2, predictor, num_features=15, num_samples=2000)
326
+ lime_results2 = exp2.as_list()
327
+ st.write(" ")
328
+ st.altair_chart(lime_viz(lime_results2))
329
+
330
+ with col3:
331
+ if not cf_df.empty:
332
+ single_nearest = alt.selection_single(on='mouseover', nearest=True)
333
+ full = alt.Chart(cf_df).encode(
334
+ alt.X('similarity:Q', scale=alt.Scale(zero=False)),
335
+ alt.Y('pred:Q'),
336
+ color=alt.Color('Categories:N', legend=alt.Legend(title="Color of Categories")),
337
+ size=alt.Size('seed:O'),
338
+ tooltip=('Categories','text','pred')
339
+ ).mark_circle(opacity=.5).properties(width=450, height=450).add_selection(single_nearest)
340
+ st.altair_chart(full)