Yuchen commited on
Commit
29cb463
1 Parent(s): a6e688f
Files changed (5) hide show
  1. .gitignore +132 -0
  2. app.py +167 -0
  3. data/cons.json +283 -0
  4. data/meta.json +1134 -0
  5. utils.py +23 -0
.gitignore ADDED
@@ -0,0 +1,132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ .idea
2
+ .DS_Store
3
+
4
+ # Byte-compiled / optimized / DLL files
5
+ __pycache__/
6
+ *.py[cod]
7
+ *$py.class
8
+
9
+ # C extensions
10
+ *.so
11
+
12
+ # Distribution / packaging
13
+ .Python
14
+ build/
15
+ develop-eggs/
16
+ dist/
17
+ downloads/
18
+ eggs/
19
+ .eggs/
20
+ lib/
21
+ lib64/
22
+ parts/
23
+ sdist/
24
+ var/
25
+ wheels/
26
+ pip-wheel-metadata/
27
+ share/python-wheels/
28
+ *.egg-info/
29
+ .installed.cfg
30
+ *.egg
31
+ MANIFEST
32
+
33
+ # PyInstaller
34
+ # Usually these files are written by a python script from a template
35
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
36
+ *.manifest
37
+ *.spec
38
+
39
+ # Installer logs
40
+ pip-log.txt
41
+ pip-delete-this-directory.txt
42
+
43
+ # Unit test / coverage reports
44
+ htmlcov/
45
+ .tox/
46
+ .nox/
47
+ .coverage
48
+ .coverage.*
49
+ .cache
50
+ nosetests.xml
51
+ coverage.xml
52
+ *.cover
53
+ *.py,cover
54
+ .hypothesis/
55
+ .pytest_cache/
56
+
57
+ # Translations
58
+ *.mo
59
+ *.pot
60
+
61
+ # Django stuff:
62
+ *.log
63
+ local_settings.py
64
+ db.sqlite3
65
+ db.sqlite3-journal
66
+
67
+ # Flask stuff:
68
+ instance/
69
+ .webassets-cache
70
+
71
+ # Scrapy stuff:
72
+ .scrapy
73
+
74
+ # Sphinx documentation
75
+ docs/_build/
76
+
77
+ # PyBuilder
78
+ target/
79
+
80
+ # Jupyter Notebook
81
+ .ipynb_checkpoints
82
+
83
+ # IPython
84
+ profile_default/
85
+ ipython_config.py
86
+
87
+ # pyenv
88
+ .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ #Pipfile.lock
96
+
97
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow
98
+ __pypackages__/
99
+
100
+ # Celery stuff
101
+ celerybeat-schedule
102
+ celerybeat.pid
103
+
104
+ # SageMath parsed files
105
+ *.sage.py
106
+
107
+ # Environments
108
+ .env
109
+ .venv
110
+ env/
111
+ venv/
112
+ ENV/
113
+ env.bak/
114
+ venv.bak/
115
+
116
+ # Spyder project settings
117
+ .spyderproject
118
+ .spyproject
119
+
120
+ # Rope project settings
121
+ .ropeproject
122
+
123
+ # mkdocs documentation
124
+ /site
125
+
126
+ # mypy
127
+ .mypy_cache/
128
+ .dmypy.json
129
+ dmypy.json
130
+
131
+ # Pyre type checker
132
+ .pyre/
app.py ADDED
@@ -0,0 +1,167 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ import json
5
+ import altair as alt
6
+ from utils import get_variable_filter, is_numerical
7
+
8
+ def check_codes(var, data_meta):
9
+ return 'codes' in data_meta[var] and data_meta[var]['codes'] is not None and len(data_meta[var]['codes']) > 0
10
+
11
+ @st.cache(show_spinner=False)
12
+ def load_data():
13
+ with st.spinner('loading data...'):
14
+ data_main = pd.read_parquet('data/ipums_full_count_nyc_census_coded_20210801.parquet')
15
+ with open('data/meta.json', 'r') as f:
16
+ data_meta = json.load(f)
17
+ data_variable, data_filter = get_variable_filter('data/cons.json')
18
+ name2id = {}
19
+ for k, v in data_meta.items():
20
+ if check_codes(k, data_meta):
21
+ name2id[k] = {vv: int(kk) for kk, vv in v['codes'].items()}
22
+ return data_main, data_meta, data_variable, data_filter, name2id
23
+
24
+ DATA_MAIN, DATA_META, DATA_VARIABLE, DATA_FILTER, NAME2ID = load_data()
25
+
26
+ def id2name(var, id_):
27
+ id_ = str(id_)
28
+ if var not in DATA_META or 'codes' not in DATA_META[var]:
29
+ return id_
30
+ return DATA_META[var]['codes'].get(id_, id_)
31
+
32
+ @st.cache
33
+ def name2id(var, name):
34
+ if var in NAME2ID:
35
+ return NAME2ID[var][name]
36
+ return name
37
+
38
+ def main():
39
+ # st.write(df.head())
40
+ load_data()
41
+ charts = {
42
+ 'Area Chart': area,
43
+ 'Line Graph': line,
44
+ 'Scatter Plot': scatter,
45
+ 'Bar Chart': bar,
46
+ 'Box Plot': box,
47
+ 'Heatmap': heat,
48
+ 'Histogram': hist,
49
+ }
50
+ st.title('HRL Portal')
51
+ with st.sidebar:
52
+ chart = st.selectbox('Select a chart type:', list(charts.keys()))
53
+ # st.write(DATA_VARIABLE)
54
+ # st.write(DATA_FILTER)
55
+ charts[chart]()
56
+
57
+ @st.cache
58
+ def get_unique(var):
59
+ if check_codes(var, DATA_META):
60
+ return list(DATA_META[var]['codes'].values())
61
+ return DATA_MAIN[var].unique().tolist()
62
+
63
+ def get_var_name(var):
64
+ if var in DATA_META and 'name' in DATA_META[var]:
65
+ name = DATA_META[var]['name']
66
+ if name == var:
67
+ return var
68
+ return f"{var} ({name})"
69
+ return var
70
+
71
+ def name2var(name):
72
+ return name.split()[0]
73
+
74
+ def meta():
75
+ st.header('Variables')
76
+ for k, v in DATA_META.items():
77
+ st.subheader(k)
78
+ st.write(v['description'])
79
+
80
+ def area():
81
+ with st.sidebar:
82
+ var_name = st.selectbox('Select a variable:', [get_var_name(var) for var in DATA_VARIABLE['area']])
83
+ var = name2var(var_name)
84
+ st.write('Filters:')
85
+ filters = {}
86
+ for fvar in DATA_FILTER[var]:
87
+ if fvar == 'YEAR':
88
+ continue
89
+ if is_numerical(fvar):
90
+ names = st.slider(fvar, min(get_unique(fvar)), max(get_unique(fvar)), value=(min(get_unique(fvar)), max(get_unique(fvar))))
91
+ else:
92
+ names = st.multiselect(fvar, get_unique(fvar), default=get_unique(fvar))
93
+ filters[fvar] = set([name2id(fvar, n) for n in names])
94
+ st.header(f'Area Chart: {var_name}')
95
+ with st.expander(f'{var}'):
96
+ st.write(DATA_META.get(var, {}).get('description', ''))
97
+
98
+ names = st.multiselect('Select values to display:', get_unique(var),
99
+ default=['Authors',
100
+ 'Musicians and music teachers',
101
+ 'Telephone operators',
102
+ 'Bus drivers','Cashiers'] if var == 'OCC1950' else ['Drugs and medicines',
103
+ 'Fisheries',
104
+ 'Glass and glass products',
105
+ ])
106
+ vals = set([name2id(var, n) for n in names])
107
+ df = get_area_data(var, filters, vals)
108
+ # st.write(df)
109
+ st.write(f'found {len(df)} records')
110
+ if len(df) > 0:
111
+ selection = alt.selection_multi(fields=[var], bind='legend')
112
+ plot = alt.Chart(df, title=f'Count of Different {var} Values').mark_area().encode(alt.X('YEAR'),
113
+ alt.Y('count', title='count', stack='zero'),
114
+ alt.Color(var,
115
+ scale=alt.Scale(scheme='category20'),
116
+ legend=alt.Legend(orient='bottom')),
117
+ opacity=alt.condition(selection,
118
+ alt.value(
119
+ 1),
120
+ alt.value(
121
+ 0.3)),
122
+ tooltip='count'
123
+ )\
124
+ .properties(width=650).add_selection(selection)
125
+ st.write(plot)
126
+
127
+ @st.cache(show_spinner=False)
128
+ def get_area_data(var, filters, vals):
129
+ # for k, v in filters.items():
130
+ # st.write(k)
131
+ # st.write(v)
132
+ # st.write(DATA_MAIN[k].unique())
133
+ # st.write(len(DATA_MAIN))
134
+ df = DATA_MAIN[['YEAR', var, *filters.keys()]]
135
+ df = df[df[var].isin(vals)]
136
+ with st.spinner('filtering...'):
137
+ for fvar, fvals in filters.items():
138
+ df = df[df[fvar].isin(fvals)]
139
+ df = df[['YEAR', var]]
140
+ # st.write(len(df))
141
+ # st.write(df.head())
142
+ with st.spinner('counting...'):
143
+ groups = df.groupby([var, 'YEAR'])[var].count().to_frame().rename(columns={var: 'count'}).reset_index()
144
+ groups[var] = groups[var].apply(lambda x: id2name(var, x))
145
+ return groups
146
+
147
+ def line():
148
+ st.header('Line Graph')
149
+
150
+ def scatter():
151
+ st.header('Scatter Plot')
152
+
153
+ def bar():
154
+ st.header('Bar Chart')
155
+
156
+ def box():
157
+ st.header('Box Plot')
158
+
159
+ def heat():
160
+ st.header('Heatmap')
161
+
162
+ def hist():
163
+ st.header('Histogram')
164
+
165
+ if __name__ == '__main__':
166
+ main()
167
+
data/cons.json ADDED
@@ -0,0 +1,283 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "variables": {
3
+ "bar": [
4
+ "SEX",
5
+ "RACE",
6
+ "HISPAN",
7
+ "AGE",
8
+ "MARST",
9
+ "FAMSIZE",
10
+ "BPL",
11
+ "MBPL",
12
+ "FBPL",
13
+ "NATIVITY",
14
+ "CITIZEN",
15
+ "SINCEIMMIG",
16
+ "AGEIMMIG",
17
+ "LIT",
18
+ "SCHOOL",
19
+ "OCC1950",
20
+ "IND1950",
21
+ "LABFORCE",
22
+ "EMPSTAT"
23
+ ],
24
+ "hist": [
25
+ "SINCEIMMIG",
26
+ "EDSCOR50",
27
+ "OCCSCORE",
28
+ "PRESGL",
29
+ "INCWAGE"
30
+ ],
31
+ "box": [
32
+ "AGE",
33
+ "RELATE",
34
+ "FAMSIZE",
35
+ "SINCEIMMIG",
36
+ "AGEIMMIG",
37
+ "EDSCOR50",
38
+ "OCCSCORE",
39
+ "PRESGL",
40
+ "INCWAGE"
41
+ ],
42
+ "scatter": [
43
+ "AGE",
44
+ "FAMSIZE",
45
+ "SINCEIMMIG",
46
+ "AGEIMMIG",
47
+ "EDSCOR50",
48
+ "OCCSCORE",
49
+ "PRESGL",
50
+ "INCWAGE"
51
+ ],
52
+ "line": [
53
+ "AGE",
54
+ "FAMSIZE",
55
+ "SINCEIMMIG",
56
+ "AGEIMMIG",
57
+ "EDSCOR50",
58
+ "OCCSCORE",
59
+ "PRESGL",
60
+ "INCWAGE"
61
+ ],
62
+ "line_count": [
63
+ "OCC1950",
64
+ "IND1950"
65
+ ],
66
+ "area": [
67
+ "OCC1950",
68
+ "IND1950"
69
+ ]
70
+ },
71
+ "filters": {
72
+ "YEAR": [],
73
+ "SERIAL": [],
74
+ "PERNUM": [],
75
+ "SEX": [
76
+ "YEAR",
77
+ "RACE",
78
+ "LIT",
79
+ "SCHOOL"
80
+ ],
81
+ "RACE": [
82
+ "YEAR",
83
+ "SEX",
84
+ "NATIVITY",
85
+ "CITIZEN",
86
+ "LIT",
87
+ "SCHOOL"
88
+ ],
89
+ "HISPAN": [
90
+ "YEAR",
91
+ "SEX",
92
+ "NATIVITY",
93
+ "CITIZEN",
94
+ "LIT",
95
+ "SCHOOL"
96
+ ],
97
+ "AGE": [
98
+ "YEAR",
99
+ "SEX",
100
+ "RACE"
101
+ ],
102
+ "RELATE": [],
103
+ "MARST": [
104
+ "YEAR",
105
+ "SEX",
106
+ "RACE",
107
+ "NATIVITY",
108
+ "CITIZEN"
109
+ ],
110
+ "FAMUNIT": [],
111
+ "FAMSIZE": [
112
+ "YEAR",
113
+ "RACE",
114
+ "NATIVITY",
115
+ "CITIZEN"
116
+ ],
117
+ "BPL": [
118
+ "YEAR",
119
+ "RACE"
120
+ ],
121
+ "MBPL": [
122
+ "YEAR",
123
+ "RACE"
124
+ ],
125
+ "FBPL": [
126
+ "YEAR",
127
+ "RACE"
128
+ ],
129
+ "NATIVITY": [
130
+ "YEAR",
131
+ "SEX",
132
+ "RACE",
133
+ "LIT",
134
+ "SCHOOL",
135
+ "OCC1950",
136
+ "IND1950"
137
+ ],
138
+ "CITIZEN": [
139
+ "YEAR",
140
+ "SEX",
141
+ "RACE",
142
+ "LIT",
143
+ "SCHOOL",
144
+ "OCC1950",
145
+ "IND1950"
146
+ ],
147
+ "SINCEIMMIG": [
148
+ "YEAR",
149
+ "SEX",
150
+ "RACE",
151
+ "NATIVITY",
152
+ "CITIZEN",
153
+ "LIT",
154
+ "SCHOOL"
155
+ ],
156
+ "AGEIMMIG": [
157
+ "YEAR",
158
+ "SEX",
159
+ "RACE",
160
+ "NATIVITY",
161
+ "CITIZEN"
162
+ ],
163
+ "LIT": [
164
+ "YEAR",
165
+ "SEX",
166
+ "RACE",
167
+ "MARST",
168
+ "NATIVITY",
169
+ "CITIZEN",
170
+ "SCHOOL",
171
+ "OCC1950",
172
+ "IND1950"
173
+ ],
174
+ "SCHOOL": [
175
+ "YEAR",
176
+ "SEX",
177
+ "RACE",
178
+ "MARST",
179
+ "NATIVITY",
180
+ "CITIZEN",
181
+ "LIT",
182
+ "OCC1950",
183
+ "IND1950"
184
+ ],
185
+ "EDSCOR50": [
186
+ "YEAR",
187
+ "SEX",
188
+ "RACE",
189
+ "NATIVITY",
190
+ "CITIZEN",
191
+ "LIT",
192
+ "SCHOOL",
193
+ "OCC1950",
194
+ "IND1950"
195
+ ],
196
+ "OCC1950": [
197
+ "YEAR",
198
+ "SEX",
199
+ "RACE",
200
+ "NATIVITY",
201
+ "CITIZEN",
202
+ "LIT",
203
+ "SCHOOL",
204
+ "LABFORCE",
205
+ "EMPSTAT"
206
+ ],
207
+ "OCCSCORE": [
208
+ "YEAR",
209
+ "SEX",
210
+ "RACE",
211
+ "NATIVITY",
212
+ "CITIZEN",
213
+ "LIT",
214
+ "SCHOOL",
215
+ "LABFORCE",
216
+ "EMPSTAT",
217
+ "OCC1950",
218
+ "IND1950"
219
+ ],
220
+ "PRESGL": [
221
+ "YEAR",
222
+ "SEX",
223
+ "RACE",
224
+ "NATIVITY",
225
+ "CITIZEN",
226
+ "LIT",
227
+ "SCHOOL",
228
+ "LABFORCE",
229
+ "EMPSTAT",
230
+ "OCC1950",
231
+ "IND1950"
232
+ ],
233
+ "IND1950": [
234
+ "YEAR",
235
+ "SEX",
236
+ "RACE",
237
+ "NATIVITY",
238
+ "CITIZEN",
239
+ "LIT",
240
+ "SCHOOL",
241
+ "LABFORCE",
242
+ "EMPSTAT"
243
+ ],
244
+ "LABFORCE": [
245
+ "YEAR",
246
+ "SEX",
247
+ "RACE",
248
+ "NATIVITY",
249
+ "CITIZEN",
250
+ "LIT",
251
+ "SCHOOL",
252
+ "LABFORCE",
253
+ "OCC1950",
254
+ "IND1950"
255
+ ],
256
+ "EMPSTAT": [
257
+ "YEAR",
258
+ "SEX",
259
+ "RACE",
260
+ "NATIVITY",
261
+ "CITIZEN",
262
+ "LIT",
263
+ "SCHOOL",
264
+ "EMPSTAT",
265
+ "OCC1950",
266
+ "IND1950"
267
+ ],
268
+ "INCWAGE": [
269
+ "YEAR",
270
+ "SEX",
271
+ "RACE",
272
+ "NATIVITY",
273
+ "CITIZEN",
274
+ "LIT",
275
+ "SCHOOL",
276
+ "LABFORCE",
277
+ "EMPSTAT",
278
+ "OCC1950",
279
+ "IND1950"
280
+ ]
281
+ }
282
+ }
283
+
data/meta.json ADDED
@@ -0,0 +1,1134 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "YEAR": {
3
+ "description": "YEAR reports the four-digit year when the household was enumerated or included in the census, the ACS, and the PRCS. For the multi-year ACS/PRCS samples, YEAR indicates the last year of data included (e.g., 2007 for the 2005-2007 3-year ACS/PRCS; 2008 for the 2006-2008 3-year ACS/PRCS; and so on). For the actual year of survey in these multi-year data, see MULTYEAR.",
4
+ "codes": null
5
+ },
6
+ "SERIAL": {
7
+ "description": "SERIAL is an identifying number unique to each household record in a given sample. All person records are assigned the same serial number as the household record that they follow. (Person records also have their own unique identifiers - see PERNUM.) A combination of SAMPLE and SERIAL provides a unique identifier for every household in the IPUMS; the combination of SAMPLE, SERIAL, and PERNUM uniquely identifies every person in the database. For 1850-1930, households that are part of a multi-household dwelling can be identified by using the DWELLING and DWSEQ variables. See \"Sample Designs\" for further discussion of sampling from within multi-household dwellings.",
8
+ "codes": null
9
+ },
10
+ "PERNUM": {
11
+ "description": "PERNUM numbers all persons within each household consecutively in the order in which they appear on the original census or survey form. When combined with SAMPLE and SERIAL, PERNUM uniquely identifies each person within the IPUMS."
12
+ },
13
+ "SEX": {
14
+ "description": "SEX reports whether the person was male or female.",
15
+ "codes": { "1": "Male", "2": "Female" }
16
+ },
17
+ "RACE": {
18
+ "description": "With the exception of the 1970-1990 Puerto Rican censuses, RACE was asked of every person in all years. The concept of race has changed over the more than 150 years represented in the IPUMS. Currently, the Census Bureau and others consider race to be a sociopolitical construct, not a scientific or anthropological one. Many detailed RACE categories consist of national origin groups. Beginning in 2000, the race question changed substantially to allow respondents to report as many races as they felt necessary to describe themselves. In earlier years, only one race response was coded. IPUMS offers several variables describing the answer(s) to the race question. RACE provides the full detail given by the respondent and/or released by the Census Bureau; it is not always historically compatible (see comparability discussion below). Users primarily interested in historical compatibility should consider using RACESING, and should consult the race code relationship page, Relationship between RACE and RACESING codes, for detail about how the RACE and RACESING codes are related. In addition, specific combinations of major races can be discerned using the following bivariate indicators of whether a particular race group was reported: RACAMIND, RACASIAN, RACBLK, RACOTHER, RACPACIS, and RACWHT. RACNUM indicates the total number of major race groups reported for an individual. The information contained in the bivariate indicators and in RACNUM is integrated into the detailed version of RACE. Users primarily interested in historical comparability should consider using RACESING and/or the accompanying variables PROBAI, PROBAPI, PROBBLK, PROBOTH, and PROBWHT. Note that Hispanic origin is assessed through separate questioning (see HISPAN). Prior to 1960, the census enumerator was responsible for categorizing persons and was not specifically instructed to ask the individual his or her race. In 1970 and later years, an individual's race was reported by someone in the household or group quarters. In the 1990 U.S. census, the 2000 U.S. and Puerto Rican censuses, the ACS, and the PRCS respondents were specifically asked what race the person \"considers himself/herself\" to be, although such self-description was more or less operative since 1960. User Note: Race questions were not asked in the Puerto Rican censuses of 1970, 1980, and 1990. They were asked in the 1910 and 1920 Puerto Rican censuses, the 2000-2010 Puerto Rican censuses, and the PRCS.",
19
+ "codes": {
20
+ "1": "White",
21
+ "2": "Black/African American/Negro",
22
+ "3": "American Indian or Alaska Native",
23
+ "4": "Chinese",
24
+ "5": "Japanese",
25
+ "6": "Other Asian or Pacific Islander"
26
+ }
27
+ },
28
+ "HISPAN": {
29
+ "description": "HISPAN identifies persons of Hispanic/Spanish/Latino origin and classifies them according to their country of origin when possible. Origin is defined by the Census Bureau as ancestry, lineage, heritage, nationality group, or country of birth. People of Hispanic origin may be of any race; see RACE for a discussion of coding issues involved. Users should note that race questions were not asked in the Puerto Rican censuses of 1970, 1980 and 1990. They were asked in the 1910 and 1920 Puerto Rican censuses, and in the 2000 and 2010 Puerto Rican census and the PRCS. However, questions assessing Spanish/Hispanic origin were not asked in the Puerto Rican censuses prior to 2000. The HISPAN general code covers country-of-origin classifications common to all years; the detailed code distinguishes additional groups and subgroups. See HISPRULE for details on how country of origin information was assigned prior to 1980.",
30
+ "codes": {
31
+ "0": "Not Hispanic",
32
+ "1": "Mexican",
33
+ "2": "Puerto Rican",
34
+ "3": "Cuban",
35
+ "4": "Other",
36
+ "9": "Not Reported"
37
+ }
38
+ },
39
+ "AGE": {
40
+ "description": "AGE reports the person's age in years as of the last birthday. Please see the Comparability section regarding a known Universe issue with AGE and AGEORIG which effects EMPSTAT and LABFORCE for the 2004 ACS Sample.",
41
+ "codes": {
42
+ "0": "Less than 1 year old"
43
+ }
44
+ },
45
+ "MARST": {
46
+ "name": "Marital Status",
47
+ "description": "MARST gives each person's current marital status.",
48
+ "codes": {}
49
+ },
50
+ "FAMUNIT": {
51
+ "description": "FAMUNIT indicates to which family within the housing unit each person belongs. If there is only one group of related individuals, all of them will be coded 1; if there is a second, separate such group, all members of that family group will be coded 2, and so on. All persons with a RELATE code less than 1100 are included in FAMUNIT, coded as 1. It is possible for an individual with a RELATE code larger than 1100 to be included in the \"primary family\" if they are identified as a child or spouse of a primary family member using SPLOC, MOMLOC, or POPLOC. The Census Bureau defines \"primary families\" as groups of persons related to the head of household, and \"primary individuals\" as household heads/householders residing without kin. In the IPUMS, primary families and primary individuals are identified in FAMUNIT with a code of 1; each secondary family or secondary individual receives a higher code. FAMUNIT is not analogous to the Census Bureau concept of \"subfamily.\" People in \"subfamilies\" are necessarily related to the householder, and they will be included in FAMUNIT, coded as 1.",
52
+ "codes": {
53
+ "1": "1st family in household or group quarters",
54
+ "2": "2nd family in household or group quarters",
55
+ "3": "3rd",
56
+ "4": "4th",
57
+ "5": "5th",
58
+ "6": "6th",
59
+ "7": "7th",
60
+ "8": "8th",
61
+ "9": "9th",
62
+ "10": "10th",
63
+ "11": "11th",
64
+ "12": "12th",
65
+ "13": "13th",
66
+ "14": "14th",
67
+ "15": "15th",
68
+ "16": "16th",
69
+ "17": "17th",
70
+ "18": "18th",
71
+ "19": "19th",
72
+ "20": "20th",
73
+ "21": "21th",
74
+ "22": "22th",
75
+ "23": "23th",
76
+ "24": "24th",
77
+ "25": "25th",
78
+ "26": "26th",
79
+ "27": "27th",
80
+ "28": "28th",
81
+ "29": "29th",
82
+ "30": "30th"
83
+ }
84
+ },
85
+ "FAMSIZE": {
86
+ "description": "FAMSIZE counts the number of own family members residing with each individual, including the person her/himself. Persons not living with others related to them by blood, marriage/cohabitating partnership, or adoption are coded 1."
87
+ },
88
+ "BPL": {
89
+ "name": "Birthplace",
90
+ "description": "BPL indicates the U.S. state, the outlying U.S. area or territory, or the foreign country where the person was born.",
91
+ "codes": {
92
+ "1": "Alabama",
93
+ "2": "Alaska",
94
+ "4": "Arizona",
95
+ "5": "Arkansas",
96
+ "6": "California",
97
+ "8": "Colorado",
98
+ "9": "Connecticut",
99
+ "10": "Delaware",
100
+ "11": "District of Columbia",
101
+ "12": "Florida",
102
+ "13": "Georgia",
103
+ "15": "Hawaii",
104
+ "16": "Idaho",
105
+ "17": "Illinois",
106
+ "18": "Indiana",
107
+ "19": "Iowa",
108
+ "20": "Kansas",
109
+ "21": "Kentucky",
110
+ "22": "Louisiana",
111
+ "23": "Maine",
112
+ "24": "Maryland",
113
+ "25": "Massachusetts",
114
+ "26": "Michigan",
115
+ "27": "Minnesota",
116
+ "28": "Mississippi",
117
+ "29": "Missouri",
118
+ "30": "Montana",
119
+ "31": "Nebraska",
120
+ "32": "Nevada",
121
+ "33": "New Hampshire",
122
+ "34": "New Jersey",
123
+ "35": "New Mexico",
124
+ "36": "New York",
125
+ "37": "North Carolina",
126
+ "38": "North Dakota",
127
+ "39": "Ohio",
128
+ "40": "Oklahoma",
129
+ "41": "Oregon",
130
+ "42": "Pennsylvania",
131
+ "44": "Rhode Island",
132
+ "45": "South Carolina",
133
+ "46": "South Dakota",
134
+ "47": "Tennessee",
135
+ "48": "Texas",
136
+ "49": "Utah",
137
+ "50": "Vermont",
138
+ "51": "Virginia",
139
+ "53": "Washington",
140
+ "54": "West Virginia",
141
+ "55": "Wisconsin",
142
+ "56": "Wyoming",
143
+ "90": "Native American",
144
+ "99": "United States, ns",
145
+ "100": "American Samoa",
146
+ "105": "Guam",
147
+ "110": "Puerto Rico",
148
+ "115": "U.S. Virgin Islands",
149
+ "120": "Other US Possessions",
150
+ "150": "Canada",
151
+ "155": "St. Pierre and Miquelon",
152
+ "160": "Atlantic Islands",
153
+ "199": "North America, ns",
154
+ "200": "Mexico",
155
+ "210": "Central America",
156
+ "250": "Cuba",
157
+ "260": "West Indies",
158
+ "299": "Americas, n.s.",
159
+ "300": "SOUTH AMERICA",
160
+ "400": "Denmark",
161
+ "401": "Finland",
162
+ "402": "Iceland",
163
+ "403": "Lapland, n.s.",
164
+ "404": "Norway",
165
+ "405": "Sweden",
166
+ "410": "England",
167
+ "411": "Scotland",
168
+ "412": "Wales",
169
+ "413": "United Kingdom, ns",
170
+ "414": "Ireland",
171
+ "419": "Northern Europe, ns",
172
+ "420": "Belgium",
173
+ "421": "France",
174
+ "422": "Liechtenstein",
175
+ "423": "Luxembourg",
176
+ "424": "Monaco",
177
+ "425": "Netherlands",
178
+ "426": "Switzerland",
179
+ "429": "Western Europe, ns",
180
+ "430": "Albania",
181
+ "431": "Andorra",
182
+ "432": "Gibraltar",
183
+ "433": "Greece",
184
+ "434": "Italy",
185
+ "435": "Malta",
186
+ "436": "Portugal",
187
+ "437": "San Marino",
188
+ "438": "Spain",
189
+ "439": "Vatican City",
190
+ "440": "Southern Europe, ns",
191
+ "450": "Austria",
192
+ "451": "Bulgaria",
193
+ "452": "Czechoslovakia",
194
+ "453": "Germany",
195
+ "454": "Hungary",
196
+ "455": "Poland",
197
+ "456": "Romania",
198
+ "457": "Yugoslavia",
199
+ "458": "Central Europe, ns",
200
+ "459": "Eastern Europe, ns",
201
+ "460": "Estonia",
202
+ "461": "Latvia",
203
+ "462": "Lithuania",
204
+ "463": "Baltic States, ns",
205
+ "465": "Other USSR/Russia",
206
+ "499": "Europe, ns",
207
+ "500": "China",
208
+ "501": "Japan",
209
+ "502": "Korea",
210
+ "509": "East Asia, ns",
211
+ "510": "Brunei",
212
+ "511": "Cambodia (Kampuchea)",
213
+ "512": "Indonesia",
214
+ "513": "Laos",
215
+ "514": "Malaysia",
216
+ "515": "Philippines",
217
+ "516": "Singapore",
218
+ "517": "Thailand",
219
+ "518": "Vietnam",
220
+ "519": "Southeast Asia, ns",
221
+ "520": "Afghanistan",
222
+ "521": "India",
223
+ "522": "Iran",
224
+ "523": "Maldives",
225
+ "524": "Nepal",
226
+ "530": "Bahrain",
227
+ "531": "Cyprus",
228
+ "532": "Iraq",
229
+ "533": "Iraq/Saudi Arabia",
230
+ "534": "Israel/Palestine",
231
+ "535": "Jordan",
232
+ "536": "Kuwait",
233
+ "537": "Lebanon",
234
+ "538": "Oman",
235
+ "539": "Qatar",
236
+ "540": "Saudi Arabia",
237
+ "541": "Syria",
238
+ "542": "Turkey",
239
+ "543": "United Arab Emirates",
240
+ "544": "Yemen Arab Republic (North)",
241
+ "545": "Yemen, PDR (South)",
242
+ "546": "Persian Gulf States, n.s.",
243
+ "547": "Middle East, ns",
244
+ "548": "Southwest Asia, nec/ns",
245
+ "549": "Asia Minor, ns",
246
+ "550": "South Asia, nec",
247
+ "599": "Asia, nec/ns",
248
+ "600": "AFRICA",
249
+ "700": "Australia and New Zealand",
250
+ "710": "Pacific Islands",
251
+ "800": "Antarctica, ns/nec",
252
+ "900": "Abroad (unknown) or at sea",
253
+ "950": "Other n.e.c.",
254
+ "999": "Missing/blank"
255
+ }
256
+ },
257
+ "MBPL": {
258
+ "name": "Mother's Birthplace",
259
+ "description": "MBPL reports the state, territory, or foreign country where the respondent's mother was born. The codes for MBPL are the same as for BPL (Birthplace). As with BPL, MBPL has a general code distinguishing places available in multiple years and a detailed code noting places unique to certain years or indicating areas that are strictly subsets of other countries.",
260
+ "codes": {
261
+ "0": "Not Applicable",
262
+ "1": "Alabama",
263
+ "2": "Alaska",
264
+ "4": "Arizona",
265
+ "5": "Arkansas",
266
+ "6": "California",
267
+ "8": "Colorado",
268
+ "9": "Connecticut",
269
+ "10": "Delaware",
270
+ "11": "District of Columbia",
271
+ "12": "Florida",
272
+ "13": "Georgia",
273
+ "15": "Hawaii",
274
+ "16": "Idaho",
275
+ "17": "Illinois",
276
+ "18": "Indiana",
277
+ "19": "Iowa",
278
+ "20": "Kansas",
279
+ "21": "Kentucky",
280
+ "22": "Louisiana",
281
+ "23": "Maine",
282
+ "24": "Maryland",
283
+ "25": "Massachusetts",
284
+ "26": "Michigan",
285
+ "27": "Minnesota",
286
+ "28": "Mississippi",
287
+ "29": "Missouri",
288
+ "30": "Montana",
289
+ "31": "Nebraska",
290
+ "32": "Nevada",
291
+ "33": "New Hampshire",
292
+ "34": "New Jersey",
293
+ "35": "New Mexico",
294
+ "36": "New York",
295
+ "37": "North Carolina",
296
+ "38": "North Dakota",
297
+ "39": "Ohio",
298
+ "40": "Oklahoma",
299
+ "41": "Oregon",
300
+ "42": "Pennsylvania",
301
+ "44": "Rhode Island",
302
+ "45": "South Carolina",
303
+ "46": "South Dakota",
304
+ "47": "Tennessee",
305
+ "48": "Texas",
306
+ "49": "Utah",
307
+ "50": "Vermont",
308
+ "51": "Virginia",
309
+ "53": "Washington",
310
+ "54": "West Virginia",
311
+ "55": "Wisconsin",
312
+ "56": "Wyoming",
313
+ "90": "Native American",
314
+ "99": "United States, ns",
315
+ "100": "American Samoa",
316
+ "105": "Guam",
317
+ "110": "Puerto Rico",
318
+ "115": "U.S. Virgin Islands",
319
+ "120": "Other US Possessions",
320
+ "150": "Canada",
321
+ "155": "St. Pierre and Miquelon",
322
+ "160": "Atlantic Islands",
323
+ "199": "North America, n.s.",
324
+ "200": "Mexico",
325
+ "210": "Central America",
326
+ "250": "Cuba",
327
+ "260": "West Indies",
328
+ "299": "Americas, n.s.",
329
+ "300": "SOUTH AMERICA",
330
+ "400": "Denmark",
331
+ "401": "Finland",
332
+ "402": "Iceland",
333
+ "403": "Lapland, n.s.",
334
+ "404": "Norway",
335
+ "405": "Sweden",
336
+ "410": "England",
337
+ "411": "Scotland",
338
+ "412": "Wales",
339
+ "413": "United Kingdom, ns",
340
+ "414": "Ireland",
341
+ "419": "Northern Europe, ns",
342
+ "420": "Belgium",
343
+ "421": "France",
344
+ "422": "Liechtenstein",
345
+ "423": "Luxembourg",
346
+ "424": "Monaco",
347
+ "425": "Netherlands",
348
+ "426": "Switzerland",
349
+ "429": "Western Europe, ns",
350
+ "430": "Albania",
351
+ "431": "Andorra",
352
+ "432": "Gibraltar",
353
+ "433": "Greece",
354
+ "434": "Italy",
355
+ "435": "Malta",
356
+ "436": "Portugal",
357
+ "437": "San Marino",
358
+ "438": "Spain",
359
+ "439": "Vatican City",
360
+ "440": "Southern Europe, n.s.",
361
+ "450": "Austria",
362
+ "451": "Bulgaria",
363
+ "452": "Czechoslovakia",
364
+ "453": "Germany",
365
+ "454": "Hungary",
366
+ "455": "Poland",
367
+ "456": "Romania",
368
+ "457": "Yugoslavia",
369
+ "458": "Central Europe, ns",
370
+ "459": "Eastern Europe, n.s.",
371
+ "460": "Estonia",
372
+ "461": "Latvia",
373
+ "462": "Lithuania",
374
+ "463": "Baltic States, ns",
375
+ "465": "Other USSR/Russia",
376
+ "499": "Europe, nec/ns",
377
+ "500": "China",
378
+ "501": "Japan",
379
+ "502": "Korea",
380
+ "509": "East Asia, n.s.",
381
+ "510": "Brunei",
382
+ "511": "Cambodia (Kampuchea)",
383
+ "512": "Indonesia",
384
+ "513": "Laos",
385
+ "514": "Malaysia",
386
+ "515": "Philippines",
387
+ "516": "Singapore",
388
+ "517": "Thailand",
389
+ "518": "Vietnam",
390
+ "519": "Southeast Asia, ns",
391
+ "520": "Afghanistan",
392
+ "521": "India",
393
+ "522": "Iran",
394
+ "523": "Maldives",
395
+ "524": "Nepal",
396
+ "530": "Bahrain",
397
+ "531": "Cyprus",
398
+ "532": "Iraq",
399
+ "533": "Iraq/Saudi Arabia",
400
+ "534": "Israel/Palestine",
401
+ "535": "Jordan",
402
+ "536": "Kuwait",
403
+ "537": "Lebanon",
404
+ "538": "Oman",
405
+ "539": "Qatar",
406
+ "540": "Saudi Arabia",
407
+ "541": "Syria",
408
+ "542": "Turkey",
409
+ "543": "United Arab Emirates",
410
+ "544": "Yemen Arab Republic (North)",
411
+ "545": "Yemen, PDR (South)",
412
+ "546": "Persian Gulf States, n.s.",
413
+ "547": "Middle East, n.s.",
414
+ "548": "Southwest Asia, nec/ns",
415
+ "549": "Asia Minor, n.s.",
416
+ "550": "South Asia, n.e.c.",
417
+ "599": "Asia, nec/ns",
418
+ "600": "AFRICA",
419
+ "700": "Australia and New Zealand",
420
+ "710": "Pacific Islands",
421
+ "900": "Abroad (unknown) or at sea",
422
+ "950": "Other n.e.c.",
423
+ "997": "Unknown",
424
+ "999": "Missing/blank"
425
+ }
426
+ },
427
+ "FBPL": {
428
+ "name": "Father's Birthplace",
429
+ "description": "FBPL reports the U.S. state, the outlying U.S. area or territory, or the foreign country where the respondent's father was born. The codes for FBPL are the same as for BPL (Birthplace). As with BPL, FBPL has a general code distinguishing places available in multiple years and a detailed code noting places unique to certain years or indicating areas that are strictly subsets of other countries.",
430
+ "codes": {
431
+ "0": "Not Applicable",
432
+ "1": "Alabama",
433
+ "2": "Alaska",
434
+ "4": "Arizona",
435
+ "5": "Arkansas",
436
+ "6": "California",
437
+ "8": "Colorado",
438
+ "9": "Connecticut",
439
+ "10": "Delaware",
440
+ "11": "District of Columbia",
441
+ "12": "Florida",
442
+ "13": "Georgia",
443
+ "15": "Hawaii",
444
+ "16": "Idaho",
445
+ "17": "Illinois",
446
+ "18": "Indiana",
447
+ "19": "Iowa",
448
+ "20": "Kansas",
449
+ "21": "Kentucky",
450
+ "22": "Louisiana",
451
+ "23": "Maine",
452
+ "24": "Maryland",
453
+ "25": "Massachusetts",
454
+ "26": "Michigan",
455
+ "27": "Minnesota",
456
+ "28": "Mississippi",
457
+ "29": "Missouri",
458
+ "30": "Montana",
459
+ "31": "Nebraska",
460
+ "32": "Nevada",
461
+ "33": "New Hampshire",
462
+ "34": "New Jersey",
463
+ "35": "New Mexico",
464
+ "36": "New York",
465
+ "37": "North Carolina",
466
+ "38": "North Dakota",
467
+ "39": "Ohio",
468
+ "40": "Oklahoma",
469
+ "41": "Oregon",
470
+ "42": "Pennsylvania",
471
+ "44": "Rhode Island",
472
+ "45": "South Carolina",
473
+ "46": "South Dakota",
474
+ "47": "Tennessee",
475
+ "48": "Texas",
476
+ "49": "Utah",
477
+ "50": "Vermont",
478
+ "51": "Virginia",
479
+ "53": "Washington",
480
+ "54": "West Virginia",
481
+ "55": "Wisconsin",
482
+ "56": "Wyoming",
483
+ "90": "Native American",
484
+ "99": "United States, ns",
485
+ "100": "American Samoa",
486
+ "105": "Guam",
487
+ "110": "Puerto Rico",
488
+ "115": "US Virgin Islands",
489
+ "120": "Other US Possessions",
490
+ "150": "Canada",
491
+ "155": "St Pierre and Miquelon",
492
+ "160": "Atlantic Islands",
493
+ "199": "North America, n.s.",
494
+ "200": "Mexico",
495
+ "210": "Central America",
496
+ "250": "Cuba",
497
+ "260": "West Indies",
498
+ "299": "Americas, n.s.",
499
+ "300": "SOUTH AMERICA",
500
+ "400": "Denmark",
501
+ "401": "Finland",
502
+ "402": "Iceland",
503
+ "403": "Lapland, n.s.",
504
+ "404": "Norway",
505
+ "405": "Sweden",
506
+ "406": "Svalbard",
507
+ "410": "England",
508
+ "411": "Scotland",
509
+ "412": "Wales",
510
+ "413": "United Kingdom, ns",
511
+ "414": "Ireland",
512
+ "419": "Northern Europe, ns",
513
+ "420": "Belgium",
514
+ "421": "France",
515
+ "422": "Liechtenstein",
516
+ "423": "Luxembourg",
517
+ "424": "Monaco",
518
+ "425": "Netherlands",
519
+ "426": "Switzerland",
520
+ "429": "Western Europe, ns",
521
+ "430": "Albania",
522
+ "431": "Andorra",
523
+ "432": "Gibraltar",
524
+ "433": "Greece",
525
+ "434": "Italy",
526
+ "435": "Malta",
527
+ "436": "Portugal",
528
+ "437": "San Marino",
529
+ "438": "Spain",
530
+ "439": "Vatican City",
531
+ "440": "Southern Europe, n.s.",
532
+ "450": "Austria",
533
+ "451": "Bulgaria",
534
+ "452": "Czechsolovakia",
535
+ "453": "Germany",
536
+ "454": "Hungary",
537
+ "455": "Poland",
538
+ "456": "Romania",
539
+ "457": "Yugoslavia",
540
+ "458": "Central Europe, ns",
541
+ "459": "Eastern Europe, ns",
542
+ "460": "Estonia",
543
+ "461": "Latvia",
544
+ "462": "Lithuania",
545
+ "463": "Baltic States, ns",
546
+ "465": "Other USSR/Russia",
547
+ "499": "Europe, nec/ns",
548
+ "500": "China",
549
+ "501": "Japan",
550
+ "502": "Korea",
551
+ "510": "Brunei",
552
+ "511": "Cambodia (Kampuchea)",
553
+ "512": "Indonesia",
554
+ "513": "Laos",
555
+ "514": "Malaysia",
556
+ "515": "Philippines",
557
+ "516": "Singapore",
558
+ "517": "Thailand",
559
+ "518": "Vietnam",
560
+ "519": "Southeast Asia, ns",
561
+ "520": "Afghanistan",
562
+ "521": "India",
563
+ "522": "Iran",
564
+ "523": "Maldives",
565
+ "524": "Nepal",
566
+ "530": "Bahrain",
567
+ "531": "Cyprus",
568
+ "532": "Iraq",
569
+ "533": "Iraq/Saudi Arabia",
570
+ "534": "Israel/Palestine",
571
+ "535": "Jordan",
572
+ "536": "Kuwait",
573
+ "537": "Lebanon",
574
+ "538": "Oman",
575
+ "539": "Qatar",
576
+ "540": "Saudi Arabia",
577
+ "541": "Syria",
578
+ "542": "Turkey",
579
+ "543": "United Arab Emirates",
580
+ "544": "Yemen Arab Republic (North)",
581
+ "545": "Yemen, PDR (South)",
582
+ "546": "Persian Gulf States, n.s.",
583
+ "547": "Middle East, ns",
584
+ "548": "Southwest Asia, nec/ns",
585
+ "549": "Asia Minor, n.s.",
586
+ "550": "South Asia, n.e.c.",
587
+ "599": "Asia, nec/ns",
588
+ "600": "AFRICA",
589
+ "700": "Australia and New Zealand",
590
+ "710": "Pacific Islands",
591
+ "900": "Abroad (unknown) or at sea",
592
+ "950": "Other n.e.c.",
593
+ "997": "Unknown",
594
+ "998": "Illegible",
595
+ "999": "Missing/blank"
596
+ }
597
+ },
598
+ "NATIVITY": {
599
+ "name": "Nativity",
600
+ "description": "NATIVITY indicates whether respondents were native-born or foreign-born; for native-born respondents, it indicates whether their mothers and/or fathers were native-born or foreign-born. NATIVITY is constructed from the IPUMS variables BPL, MBPL, and FBPL. Those U.S. possessions and territories classified as \"U.S. outlying areas\" in BPL are considered foreign. For a similar variable that identifies those who are foreign or native born in 1970 for Puerto Rico, see NATIVPR.",
601
+ "codes": {
602
+ "-1": "N/A",
603
+ "1": "Both parents native-born",
604
+ "2": "Father foreign, mother native",
605
+ "3": "Mother foreign, father native",
606
+ "4": "Both parents foreign",
607
+ "5": "Foreign-Born"
608
+ }
609
+ },
610
+ "CITIZEN": {
611
+ "name": "Citizenship",
612
+ "description": "CITIZEN reports the citizenship status of respondents, distinguishing between naturalized citizens and non-citizens. For 1900-1940, respondents who were not yet citizens but who had begun the naturalization process (\"received first papers\") are identified.",
613
+ "codes": {
614
+ "-1": "N/A",
615
+ "1": "Born abroad of American parents",
616
+ "2": "Naturalized citizen",
617
+ "3": "Not a citizen",
618
+ "4": "Not a citizen, but has received first papers",
619
+ "5": "Foreign born, citizenship status not reported"
620
+ }
621
+ },
622
+ "YRIMMIG": {
623
+ "name": "Year of Immigration",
624
+ "description": "YRIMMIG reports the year in which a foreign-born person entered the United States (or Puerto Rico, for the 1910 and 1920 Puerto Rico samples). For the 1900-1930 samples and the 2000-2004 ACS, YRIMMIG reports the exact year of immigration. For 1970-1990, the respondent was asked to report the range of years that included their year of arrival. For the 2000 census and the ACS from 2005 onward, exact years are reported back to 1935; some years prior to 1935 are collapsed into categories (see the codes page for details). The codes for all such categories represent the latest possible year in which a respondent could have immigrated. Other immigration variables are available; see the following table: table_208.html   USA Puerto Rico Year immigrated YRIMMIG YRIMMIPR Number of years living in area (continuous; limited availability) YRSUSA1 YRSPR Number of years living in area (intervalled; wide availability) YRSUSA2 YRSPR2",
625
+ "codes": {}
626
+ },
627
+ "LIT": {
628
+ "name": "Literacy",
629
+ "description": "LIT indicates whether the respondent could read and/or write in any language.",
630
+ "codes": {
631
+ "-1": "N/A",
632
+ "1": "No, illiterate (cannot read nor write)",
633
+ "2": "Can't read, can write",
634
+ "3": "Can't write, can read",
635
+ "4": "Yes, literate (reads and writes)"
636
+ }
637
+ },
638
+ "SCHOOL": {
639
+ "name": "School",
640
+ "description": "SCHOOL indicates whether the respondent attended school during a specified period.",
641
+ "codes": {}
642
+ },
643
+ "EDSCOR50": {
644
+ "name": "",
645
+ "description": "EDSCOR50 is a constructed variable indicating the percentage of people in the respondent's occupational category who had completed one or more years of college. This variable relies on the modified version of the 1950 occupational classification scheme available in the OCC1950 variable. EDSCOR50 is derived by using educational attainment information (available in EDUC) for the employed civilian labor force aged 16 and up, excluding persons who did not work in the past year. For years prior to 1950, occupations are assigned the EDSCOR50 value derived from the 1950 data. The OCC1950 categories 12-29 (professors and instructors) were treated as one single category in the process of deriving EDSCOR50, since there were not enough cases to be kept as separate categories. All these occupational categories have the same value of EDSCOR50. Alternative measures of occupational standing measures that are based on OCC1950 are available in ERSCOR50, NPBOSS50, OCCSCORE, PRESGL, and SEI. For information on the construction of OCC1950 and occupational standing measures, see \"Integrated Occupation and Industry Codes and Occupational Standing Variables in the IPUMS.\" User Note: There is significant debate about the usefulness of composite measures of occupational standing (in the IPUMS, these variables include SEI, HWSEI, NPBOSS50, and NPBOSS90). We strongly urge researchers to read our user note on this issue and to familiarize themselves with the debates surrounding the use of these variables.",
646
+ "codes": {}
647
+ },
648
+ "OCC1950": {
649
+ "name": "Occupation",
650
+ "description": "OCC1950 applies the 1950 Census Bureau occupational classification system to occupational data, to enhance comparability across years. For pre-1940 samples created at Minnesota, the alphabetic responses supplied by enumerators were directly coded into the 1950 classification. For other samples, the information in the variable OCC was recoded into the 1950 classification. Codes above 970 are non-occupational responses retained in the historical census samples or blank/unknown. The design of OCC1950 is described at length in \" Integrated Occupation and Industry Codes and Occupational Standing Variables in the IPUMS.\". The composition of the 1950 occupation categories is described in detail in U.S. Bureau of the Census, Alphabetic Index of Occupations and Industries: 1950 (Washington D.C., 1950).\n\nIn 1850-1880, any laborer with no specified industry in a household with a farmer is recoded into farm labor. In 1860-1900, any woman with an occupational response of \"housekeeper\" enters the non-occupational category \"keeping house\" if she is related to the head of household. Cases affected by these imputation procedures are identified by an appropriate data quality flag.\n\nA parallel variable called OCC1990, available for the samples from 1950 onward, codes occupations into a simplified version of the 1990 occupational coding scheme.",
651
+ "codes": {
652
+ "0": "Accountants and auditors",
653
+ "1": "Actors and actresses",
654
+ "2": "Airplane pilots and navigators",
655
+ "3": "Architects",
656
+ "4": "Artists and art teachers",
657
+ "5": "Athletes",
658
+ "6": "Authors",
659
+ "7": "Chemists",
660
+ "8": "Chiropractors",
661
+ "9": "Clergymen",
662
+ "10": "College presidents and deans",
663
+ "12": "Agricultural sciences",
664
+ "13": "Biological sciences",
665
+ "14": "Chemistry",
666
+ "15": "Economics",
667
+ "16": "Engineering",
668
+ "17": "Geology and geophysics",
669
+ "18": "Mathematics",
670
+ "19": "Medical sciences",
671
+ "23": "Physics",
672
+ "24": "Psychology",
673
+ "25": "Statistics",
674
+ "26": "Natural science (n.e.c.)",
675
+ "27": "Social sciences (n.e.c.)",
676
+ "28": "Non-scientific subjects",
677
+ "29": "Subject not specified",
678
+ "31": "Dancers and dancing teachers",
679
+ "32": "Dentists",
680
+ "33": "Designers",
681
+ "34": "Dietitians and nutritionists",
682
+ "35": "Draftsmen",
683
+ "36": "Editors and reporters",
684
+ "41": "Engineers, aeronautical",
685
+ "42": "Engineers, chemical",
686
+ "43": "Engineers, civil",
687
+ "44": "Engineers, electrical",
688
+ "45": "Engineers, industrial",
689
+ "46": "Engineers, mechanical",
690
+ "47": "Engineers, metallurgical, metallurgists",
691
+ "48": "Engineers, mining",
692
+ "49": "Engineers (n.e.c.)",
693
+ "51": "Entertainers (n.e.c.)",
694
+ "52": "Farm and home management advisors",
695
+ "53": "Foresters and conservationists",
696
+ "54": "Funeral directors and embalmers",
697
+ "55": "Lawyers and judges",
698
+ "56": "Librarians",
699
+ "57": "Musicians and music teachers",
700
+ "58": "Nurses, professional",
701
+ "59": "Nurses, student professional",
702
+ "61": "Agricultural scientists",
703
+ "62": "Biological scientists",
704
+ "63": "Geologists and geophysicists",
705
+ "67": "Mathematicians",
706
+ "68": "Physicists",
707
+ "69": "Miscellaneous natural scientists",
708
+ "70": "Optometrists",
709
+ "71": "Osteopaths",
710
+ "72": "Personnel and labor relations workers",
711
+ "73": "Pharmacists",
712
+ "74": "Photographers",
713
+ "75": "Physicians and surgeons",
714
+ "76": "Radio operators",
715
+ "77": "Recreation and group workers",
716
+ "78": "Religious workers",
717
+ "79": "Social and welfare workers, except group",
718
+ "81": "Economists",
719
+ "82": "Psychologists",
720
+ "83": "Statisticians and actuaries",
721
+ "84": "Miscellaneous social scientists",
722
+ "91": "Sports instructors and officials",
723
+ "92": "Surveyors",
724
+ "93": "Teachers (n.e.c.)",
725
+ "94": "Technicians, medical and dental",
726
+ "95": "Technicians, testing",
727
+ "96": "Technicians (n.e.c.)",
728
+ "97": "Therapists and healers (n.e.c.)",
729
+ "98": "Veterinarians",
730
+ "99": "Professional, technical and kindred workers (n.e.c.)",
731
+ "100": "Farmers (owners and tenants)",
732
+ "123": "Farm managers",
733
+ "200": "Buyers and department heads, store",
734
+ "201": "Buyers and shippers, farm products",
735
+ "203": "Conductors, railroad",
736
+ "204": "Credit men",
737
+ "205": "Floormen and floor managers, store",
738
+ "210": "Inspectors, public administration",
739
+ "230": "Managers and superintendents, building",
740
+ "240": "Officers, pilots, pursers and engineers, ship",
741
+ "250": "Officials and administrators (n.e.c.), public administration",
742
+ "260": "Officials, lodge, society, union, etc.",
743
+ "270": "Postmasters",
744
+ "280": "Purchasing agents and buyers (n.e.c.)",
745
+ "290": "Managers, officials, and proprietors (n.e.c.)",
746
+ "300": "Agents (n.e.c.)",
747
+ "301": "Attendants and assistants, library",
748
+ "302": "Attendants, physician's and dentist's office",
749
+ "304": "Baggagemen, transportation",
750
+ "305": "Bank tellers",
751
+ "310": "Bookkeepers",
752
+ "320": "Cashiers",
753
+ "321": "Collectors, bill and account",
754
+ "322": "Dispatchers and starters, vehicle",
755
+ "325": "Express messengers and railway mail clerks",
756
+ "335": "Mail carriers",
757
+ "340": "Messengers and office boys",
758
+ "341": "Office machine operators",
759
+ "342": "Shipping and receiving clerks",
760
+ "350": "Stenographers, typists, and secretaries",
761
+ "360": "Telegraph messengers",
762
+ "365": "Telegraph operators",
763
+ "370": "Telephone operators",
764
+ "380": "Ticket, station, and express agents",
765
+ "390": "Clerical and kindred workers (n.e.c.)",
766
+ "400": "Advertising agents and salesmen",
767
+ "410": "Auctioneers",
768
+ "420": "Demonstrators",
769
+ "430": "Hucksters and peddlers",
770
+ "450": "Insurance agents and brokers",
771
+ "460": "Newsboys",
772
+ "470": "Real estate agents and brokers",
773
+ "480": "Stock and bond salesmen",
774
+ "490": "Salesmen and sales clerks (n.e.c.)",
775
+ "500": "Bakers",
776
+ "501": "Blacksmiths",
777
+ "502": "Bookbinders",
778
+ "503": "Boilermakers",
779
+ "504": "Brickmasons, stonemasons, and tile setters",
780
+ "505": "Cabinetmakers",
781
+ "510": "Carpenters",
782
+ "511": "Cement and concrete finishers",
783
+ "512": "Compositors and typesetters",
784
+ "513": "Cranemen, derrickmen, and hoistmen",
785
+ "514": "Decorators and window dressers",
786
+ "515": "Electricians",
787
+ "520": "Electrotypers and stereotypers",
788
+ "521": "Engravers, except photoengravers",
789
+ "522": "Excavating, grading, and road machinery operators",
790
+ "523": "Foremen (n.e.c.)",
791
+ "524": "Forgemen and hammermen",
792
+ "525": "Furriers",
793
+ "530": "Glaziers",
794
+ "531": "Heat treaters, annealers, temperers",
795
+ "532": "Inspectors, scalers, and graders, log and lumber",
796
+ "533": "Inspectors (n.e.c.)",
797
+ "534": "Jewelers, watchmakers, goldsmiths, and silversmiths",
798
+ "535": "Job setters, metal",
799
+ "540": "Linemen and servicemen, telegraph, telephone, and power",
800
+ "541": "Locomotive engineers",
801
+ "542": "Locomotive firemen",
802
+ "543": "Loom fixers",
803
+ "544": "Machinists",
804
+ "545": "Mechanics and repairmen, airplane",
805
+ "550": "Mechanics and repairmen, automobile",
806
+ "551": "Mechanics and repairmen, office machine",
807
+ "552": "Mechanics and repairmen, radio and television",
808
+ "553": "Mechanics and repairmen, railroad and car shop",
809
+ "554": "Mechanics and repairmen (n.e.c.)",
810
+ "555": "Millers, grain, flour, feed, etc.",
811
+ "560": "Millwrights",
812
+ "561": "Molders, metal",
813
+ "562": "Motion picture projectionists",
814
+ "563": "Opticians and lens grinders and polishers",
815
+ "564": "Painters, construction and maintenance",
816
+ "565": "Paperhangers",
817
+ "570": "Pattern and model makers, except paper",
818
+ "571": "Photoengravers and lithographers",
819
+ "572": "Piano and organ tuners and repairmen",
820
+ "573": "Plasterers",
821
+ "574": "Plumbers and pipe fitters",
822
+ "575": "Pressmen and plate printers, printing",
823
+ "580": "Rollers and roll hands, metal",
824
+ "581": "Roofers and slaters",
825
+ "582": "Shoemakers and repairers, except factory",
826
+ "583": "Stationary engineers",
827
+ "584": "Stone cutters and stone carvers",
828
+ "585": "Structural metal workers",
829
+ "590": "Tailors and tailoresses",
830
+ "591": "Tinsmiths, coppersmiths, and sheet metal workers",
831
+ "592": "Tool makers, and die makers and setters",
832
+ "593": "Upholsterers",
833
+ "594": "Craftsmen and kindred workers (n.e.c.)",
834
+ "595": "Members of the armed services",
835
+ "600": "Apprentice auto mechanics",
836
+ "601": "Apprentice bricklayers and masons",
837
+ "602": "Apprentice carpenters",
838
+ "603": "Apprentice electricians",
839
+ "604": "Apprentice machinists and toolmakers",
840
+ "605": "Apprentice mechanics, except auto",
841
+ "610": "Apprentice plumbers and pipe fitters",
842
+ "611": "Apprentices, building trades (n.e.c.)",
843
+ "612": "Apprentices, metalworking trades (n.e.c.)",
844
+ "613": "Apprentices, printing trades",
845
+ "614": "Apprentices, other specified trades",
846
+ "615": "Apprentices, trade not specified",
847
+ "620": "Asbestos and insulation workers",
848
+ "621": "Attendants, auto service and parking",
849
+ "622": "Blasters and powdermen",
850
+ "623": "Boatmen, canalmen, and lock keepers",
851
+ "624": "Brakemen, railroad",
852
+ "625": "Bus drivers",
853
+ "630": "Chainmen, rodmen, and axmen, surveying",
854
+ "631": "Conductors, bus and street railway",
855
+ "632": "Deliverymen and routemen",
856
+ "633": "Dressmakers and seamstresses, except factory",
857
+ "634": "Dyers",
858
+ "635": "Filers, grinders, and polishers, metal",
859
+ "640": "Fruit, nut, and vegetable graders, and packers, except factory",
860
+ "641": "Furnacemen, smeltermen and pourers",
861
+ "642": "Heaters, metal",
862
+ "643": "Laundry and dry cleaning operatives",
863
+ "644": "Meat cutters, except slaughter and packing house",
864
+ "645": "Milliners",
865
+ "650": "Mine operatives and laborers",
866
+ "660": "Motormen, mine, factory, logging camp, etc.",
867
+ "661": "Motormen, street, subway, and elevated railway",
868
+ "662": "Oilers and greaser, except auto",
869
+ "670": "Painters, except construction or maintenance",
870
+ "671": "Photographic process workers",
871
+ "672": "Power station operators",
872
+ "673": "Sailors and deck hands",
873
+ "674": "Sawyers",
874
+ "675": "Spinners, textile",
875
+ "680": "Stationary firemen",
876
+ "681": "Switchmen, railroad",
877
+ "682": "Taxicab drivers and chauffers",
878
+ "683": "Truck and tractor drivers",
879
+ "684": "Weavers, textile",
880
+ "685": "Welders and flame cutters",
881
+ "690": "Operative and kindred workers (n.e.c.)",
882
+ "700": "Housekeepers, private household",
883
+ "710": "Laundresses, private household",
884
+ "720": "Private household workers (n.e.c.)",
885
+ "730": "Attendants, hospital and other institution",
886
+ "731": "Attendants, professional and personal service (n.e.c.)",
887
+ "732": "Attendants, recreation and amusement",
888
+ "740": "Barbers, beauticians, and manicurists",
889
+ "750": "Bartenders",
890
+ "751": "Bootblacks",
891
+ "752": "Boarding and lodging house keepers",
892
+ "753": "Charwomen and cleaners",
893
+ "754": "Cooks, except private household",
894
+ "760": "Counter and fountain workers",
895
+ "761": "Elevator operators",
896
+ "762": "Firemen, fire protection",
897
+ "763": "Guards, watchmen, and doorkeepers",
898
+ "764": "Housekeepers and stewards, except private household",
899
+ "770": "Janitors and sextons",
900
+ "771": "Marshals and constables",
901
+ "772": "Midwives",
902
+ "773": "Policemen and detectives",
903
+ "780": "Porters",
904
+ "781": "Practical nurses",
905
+ "782": "Sheriffs and bailiffs",
906
+ "783": "Ushers, recreation and amusement",
907
+ "784": "Waiters and waitresses",
908
+ "785": "Watchmen (crossing) and bridge tenders",
909
+ "790": "Service workers, except private household (n.e.c.)",
910
+ "810": "Farm foremen",
911
+ "820": "Farm laborers, wage workers",
912
+ "830": "Farm laborers, unpaid family workers",
913
+ "840": "Farm service laborers, self-employed",
914
+ "910": "Fishermen and oystermen",
915
+ "920": "Garage laborers and car washers and greasers",
916
+ "930": "Gardeners, except farm and groundskeepers",
917
+ "940": "Longshoremen and stevedores",
918
+ "950": "Lumbermen, raftsmen, and woodchoppers",
919
+ "960": "Teamsters",
920
+ "970": "Laborers (n.e.c.)",
921
+ "979": "Not yet classified",
922
+ "980": "Keeps house/housekeeping at home/housewife",
923
+ "981": "Imputed keeping house (1850-1900)",
924
+ "982": "Helping at home/helps parents/housework",
925
+ "983": "At school/student",
926
+ "984": "Retired",
927
+ "985": "Unemployed/without occupation",
928
+ "986": "Invalid/disabled w/ no occupation reported",
929
+ "987": "Inmate",
930
+ "990": "New Worker",
931
+ "991": "Gentleman/lady/at leisure",
932
+ "995": "Other non-occupational response",
933
+ "997": "Occupation missing/unknown",
934
+ "999": "N/A (blank)"
935
+ }
936
+ },
937
+ "OCCSCORE": {
938
+ "name": "Occupational Income Scores ",
939
+ "description": "OCCSCORE is a constructed variable that assigns occupational income scores to each occupation. The construction of this variable is described in \"Integrated Occupation and Industry Codes and Occupational Standing Variables in the IPUMS\" which users should read before using this variable. OCCSCORE assigns each occupation in all years a value representing the median total income (in hundreds of 1950 dollars) of all persons with that particular occupation in 1950. OCCSCORE thus provides a continuous measure of occupations, according to the economic rewards enjoyed by people working at them in 1950. The OCCSCORE variable is based on OCC1950. Alternative measures of occupational standing measures that are based on OCC1950 are available in EDSCOR50, ERSCOR50, NPBOSS50, PRESGL, and SEI. For information on the construction of OCC1950 and occupational standing measures, see \" Integrated Occupation and Industry Codes and Occupational Standing Variables in the IPUMS\". User caution: There is significant debate about the usefulness of composite measures of occupational standing (in the IPUMS, these variables include SEI, HWSEI, NPBOSS50, and NPBOSS90). We strongly urge researchers to read our user note on this issue and to familiarize themselves with the debates surrounding the use of these variables.",
940
+ "codes": {}
941
+ },
942
+ "PRESGL": {
943
+ "name": "Siegel Prestige Score",
944
+ "description": "PRESGL is a constructed variable that assigns a Siegel prestige score to each occupation using the occupational classification scheme available in OCC1950 variable.",
945
+ "codes": {}
946
+ },
947
+ "IND1950": {
948
+ "name": "IND1950",
949
+ "description": "IND1950 recodes information about industry into the 1950 Census Bureau industrial classification system and thus enhances comparability of industry data across all years included in the IPUMS. IND1950 was designed the same way as OCC1950 (Occupation, 1950 basis), the procedure for which is discussed in detail in \" Integrated Occupation and Industry Codes and Occupational Standing Variables in the IPUMS.\" The composition of the industry categories is described in detail in U.S. Bureau of the Census, Alphabetic Index of Occupations and Industries: 1950 (Washington, D.C., 1950).\n\nThe term \"labor force\" and other key employment concepts are defined in the documentation for EMPSTAT and LABFORCE.\n\nIn the 1850-1930 samples, the universe for IND1950 relied on persons having an occupation recorded in OCC.\n\nA parallel variable called IND1990, available for the samples from 1950 onward, codes occupations into a simplified version of the 1990 occupational coding scheme.",
950
+ "codes": {
951
+ "0": "N/A or none reported",
952
+ "105": "Agriculture",
953
+ "116": "Forestry",
954
+ "126": "Fisheries",
955
+ "206": "Metal mining",
956
+ "216": "Coal mining",
957
+ "226": "Crude petroleum and natural gas extraction",
958
+ "236": "Nonmetallic mining and quarrying, except fuel",
959
+ "239": "Mining, not specified",
960
+ "246": "Construction",
961
+ "306": "Logging",
962
+ "307": "Sawmills, planing mills, and mill work",
963
+ "308": "Miscellaneous wood products",
964
+ "309": "Furniture and fixtures",
965
+ "316": "Glass and glass products",
966
+ "317": "Cement, concrete, gypsum and plaster products",
967
+ "318": "Structural clay products",
968
+ "319": "Pottery and related products",
969
+ "326": "Miscellaneous nonmetallic mineral and stone products",
970
+ "336": "Blast furnaces, steel works, and rolling mills",
971
+ "337": "Other primary iron and steel industries",
972
+ "338": "Primary nonferrous industries",
973
+ "346": "Fabricated steel products",
974
+ "347": "Fabricated nonferrous metal products",
975
+ "348": "Not specified metal industries",
976
+ "356": "Agricultural machinery and tractors",
977
+ "357": "Office and store machines and devices",
978
+ "358": "Miscellaneous machinery",
979
+ "367": "Electrical machinery, equipment, and supplies",
980
+ "376": "Motor vehicles and motor vehicle equipment",
981
+ "377": "Aircraft and parts",
982
+ "378": "Ship and boat building and repairing",
983
+ "379": "Railroad and miscellaneous transportation equipment",
984
+ "386": "Professional equipment and supplies",
985
+ "387": "Photographic equipment and supplies",
986
+ "388": "Watches, clocks, and clockwork-operated devices",
987
+ "399": "Miscellaneous manufacturing industries",
988
+ "406": "Meat products",
989
+ "407": "Dairy products",
990
+ "408": "Canning and preserving fruits, vegetables, and seafoods",
991
+ "409": "Grain-mill products",
992
+ "416": "Bakery products",
993
+ "417": "Confectionery and related products",
994
+ "418": "Beverage industries",
995
+ "419": "Miscellaneous food preparations and kindred products",
996
+ "426": "Not specified food industries",
997
+ "429": "Tobacco manufactures",
998
+ "436": "Knitting mills",
999
+ "437": "Dyeing and finishing textiles, except knit goods",
1000
+ "438": "Carpets, rugs, and other floor coverings",
1001
+ "439": "Yarn, thread, and fabric mills",
1002
+ "446": "Miscellaneous textile mill products",
1003
+ "448": "Apparel and accessories",
1004
+ "449": "Miscellaneous fabricated textile products",
1005
+ "456": "Pulp, paper, and paperboard mills",
1006
+ "457": "Paperboard containers and boxes",
1007
+ "458": "Miscellaneous paper and pulp products",
1008
+ "459": "Printing, publishing, and allied industries",
1009
+ "466": "Synthetic fibers",
1010
+ "467": "Drugs and medicines",
1011
+ "468": "Paints, varnishes, and related products",
1012
+ "469": "Miscellaneous chemicals and allied products",
1013
+ "476": "Petroleum refining",
1014
+ "477": "Miscellaneous petroleum and coal products",
1015
+ "478": "Rubber products",
1016
+ "487": "Leather: tanned, curried, and finished",
1017
+ "488": "Footwear, except rubber",
1018
+ "489": "Leather products, except footwear",
1019
+ "499": "Not specified manufacturing industries",
1020
+ "506": "Railroads and railway express service",
1021
+ "516": "Street railways and bus lines",
1022
+ "526": "Trucking service",
1023
+ "527": "Warehousing and storage",
1024
+ "536": "Taxicab service",
1025
+ "546": "Water transportation",
1026
+ "556": "Air transportation",
1027
+ "567": "Petroleum and gasoline pipe lines",
1028
+ "568": "Services incidental to transportation",
1029
+ "578": "Telephone",
1030
+ "579": "Telegraph",
1031
+ "586": "Electric light and power",
1032
+ "587": "Gas and steam supply systems",
1033
+ "588": "Electric-gas utilities",
1034
+ "596": "Water supply",
1035
+ "597": "Sanitary services",
1036
+ "598": "Other and not specified utilities",
1037
+ "606": "Motor vehicles and equipment",
1038
+ "607": "Drugs, chemicals, and allied products",
1039
+ "608": "Dry goods apparel",
1040
+ "609": "Food and related products",
1041
+ "616": "Electrical goods, hardware, and plumbing equipment",
1042
+ "617": "Machinery, equipment, and supplies",
1043
+ "618": "Petroleum products",
1044
+ "619": "Farm products--raw materials",
1045
+ "626": "Miscellaneous wholesale trade",
1046
+ "627": "Not specified wholesale trade",
1047
+ "636": "Food stores, except dairy products",
1048
+ "637": "Dairy products stores and milk retailing",
1049
+ "646": "General merchandise stores",
1050
+ "647": "Five and ten cent stores",
1051
+ "656": "Apparel and accessories stores, except shoe",
1052
+ "657": "Shoe stores",
1053
+ "658": "Furniture and house furnishing stores",
1054
+ "659": "Household appliance and radio stores",
1055
+ "667": "Motor vehicles and accessories retailing",
1056
+ "668": "Gasoline service stations",
1057
+ "669": "Drug stores",
1058
+ "679": "Eating and drinking places",
1059
+ "686": "Hardware and farm implement stores",
1060
+ "687": "Lumber and building material retailing",
1061
+ "688": "Liquor stores",
1062
+ "689": "Retail florists",
1063
+ "696": "Jewelry stores",
1064
+ "697": "Fuel and ice retailing",
1065
+ "698": "Miscellaneous retail stores",
1066
+ "699": "Not specified retail trade",
1067
+ "716": "Banking and credit agencies",
1068
+ "726": "Security and commodity brokerage and investment companies",
1069
+ "736": "Insurance",
1070
+ "746": "Real estate",
1071
+ "756": "Real estate-insurance-law offices",
1072
+ "806": "Advertising",
1073
+ "807": "Accounting, auditing, and bookkeeping services",
1074
+ "808": "Miscellaneous business services",
1075
+ "816": "Auto repair services and garages",
1076
+ "817": "Miscellaneous repair services",
1077
+ "826": "Private households",
1078
+ "836": "Hotels and lodging places",
1079
+ "846": "Laundering, cleaning, and dyeing services",
1080
+ "847": "Dressmaking shops",
1081
+ "848": "Shoe repair shops",
1082
+ "849": "Miscellaneous personal services",
1083
+ "856": "Radio broadcasting and television",
1084
+ "857": "Theaters and motion pictures",
1085
+ "858": "Bowling alleys, and billiard and pool parlors",
1086
+ "859": "Miscellaneous entertainment and recreation services",
1087
+ "868": "Medical and other health services, except hospitals",
1088
+ "869": "Hospitals",
1089
+ "879": "Legal services",
1090
+ "888": "Educational services",
1091
+ "896": "Welfare and religious services",
1092
+ "897": "Nonprofit membership organizations",
1093
+ "898": "Engineering and architectural services",
1094
+ "899": "Miscellaneous professional and related services",
1095
+ "906": "Postal service",
1096
+ "916": "Federal public administration",
1097
+ "926": "State public administration",
1098
+ "936": "Local public administration",
1099
+ "946": "Public Administration, level not specified",
1100
+ "976": "Common or general laborer",
1101
+ "979": "Not yet specified",
1102
+ "980": "Unpaid domestic work",
1103
+ "982": "Housework at home",
1104
+ "983": "School response (students, etc.)",
1105
+ "984": "Retired",
1106
+ "986": "Sick/disabled",
1107
+ "987": "Institution response",
1108
+ "991": "Lady/Man of leisure",
1109
+ "995": "Non-industrial response",
1110
+ "997": "Nonclassifiable",
1111
+ "998": "Industry not reported",
1112
+ "999": "Blank or blank equivalent"
1113
+ }
1114
+ },
1115
+ "LABFORCE": {
1116
+ "name": "Labor Force",
1117
+ "description": "LABFORCE is a dichotomous variable indicating whether a person participated in the labor force. See EMPSTAT for a non-dichotomous variable that indicates whether the respondent was part of the labor force -- working or seeking work -- and, if so, whether the person was currently unemployed.",
1118
+ "codes": {
1119
+ "1": "No, not in the labor force",
1120
+ "2": "Yes, in the labor force"
1121
+ }
1122
+ },
1123
+ "EMPSTAT": {
1124
+ "name": "Employment Status",
1125
+ "description": "EMPSTAT indicates whether the respondent was a part of the labor force -- working or seeking work -- and, if so, whether the person was currently unemployed. The second digit preserves additional related information available for some years but not others. See LABFORCE for a dichotomous variable that identifies whether a person participated in the labor force or not and is available for all years in the IPUMS.",
1126
+ "codes": { "-1": "N/A", "1": "Employed", "2": "Unemployed", "3": "Not in labor force" }
1127
+ },
1128
+ "INCWAGE": {
1129
+ "name": "Pre-tax wage and Salary Income",
1130
+ "description": "INCWAGE reports each respondent's total pre-tax wage and salary income - that is, money received as an employee - for the previous year. ",
1131
+ "codes": {}
1132
+ }
1133
+ }
1134
+
utils.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+
3
+ oov = ["HISPAN", "FAMUNIT", "SCHOOL", "LABFORCE"]
4
+ numericals = {'YEAR'}
5
+
6
+ def get_variable_filter(const_path):
7
+ with open(const_path, 'r') as f:
8
+ const = json.load(f)
9
+ return remove_oov(const)
10
+
11
+ def remove_oov(const):
12
+ va = {}
13
+ fi = {}
14
+ for graph, varlist in const['variables'].items():
15
+ va[graph] = [v for v in varlist if v not in oov]
16
+ for v, vfilist in const['filters'].items():
17
+ if v in oov:
18
+ continue
19
+ fi[v] = [fv for fv in vfilist if fv not in oov]
20
+ return va, fi
21
+
22
+ def is_numerical(var):
23
+ return var in numericals