Spaces:
Runtime error
Runtime error
import json | |
import altair as alt | |
import pandas as pd | |
import streamlit as st | |
st.set_page_config(layout="wide") | |
from utils import get_variable_filter, is_numerical | |
def check_codes(var, data_meta): | |
return 'codes' in data_meta[var] and data_meta[var]['codes'] is not None and len(data_meta[var]['codes']) > 0 | |
def load_data(): | |
with st.spinner('loading data...'): | |
data_main = pd.read_parquet('data/ipums_full_count_nyc_census_coded_20210801.parquet') | |
with open('data/meta.json', 'r') as f: | |
data_meta = json.load(f) | |
data_variable, data_filter = get_variable_filter('data/cons.json') | |
name2id = {} | |
for k, v in data_meta.items(): | |
if check_codes(k, data_meta): | |
name2id[k] = {vv: int(kk) for kk, vv in v['codes'].items()} | |
return data_main, data_meta, data_variable, data_filter, name2id | |
DATA_MAIN, DATA_META, DATA_VARIABLE, DATA_FILTER, NAME2ID = load_data() | |
def id2name(var, id_): | |
id_ = str(id_) | |
if var not in DATA_META or 'codes' not in DATA_META[var]: | |
return id_ | |
return DATA_META[var]['codes'].get(id_, id_) | |
def name2id(var, name): | |
if var in NAME2ID: | |
return NAME2ID[var][name] | |
return name | |
def main(): | |
# st.write(df.head()) | |
load_data() | |
charts = { | |
'Area Chart': area, | |
'Line Graph': line, | |
'Scatter Plot': scatter, | |
'Bar Chart': bar, | |
'Box Plot': box, | |
'Heatmap': heat, | |
'Histogram': hist, | |
} | |
# st.title('HRL Portal') | |
with st.sidebar: | |
st.subheader('Chart Type') | |
chart = st.selectbox('Select a chart type:', list(charts.keys())) | |
# chart = st.selectbox('Select a chart type:', list(charts.keys()), label_visibility='collapsed') | |
# st.write(DATA_VARIABLE) | |
# st.write(DATA_FILTER) | |
charts[chart]() | |
def get_unique(var): | |
if check_codes(var, DATA_META): | |
return list(DATA_META[var]['codes'].values()) | |
return DATA_MAIN[var].unique().tolist() | |
def get_var_name(var): | |
if var in DATA_META and 'name' in DATA_META[var]: | |
name = DATA_META[var]['name'] | |
if name == var: | |
return var | |
return f"{var} ({name})" | |
return var | |
def name2var(name): | |
return name.split()[0] | |
def meta(): | |
st.header('Variables') | |
for k, v in DATA_META.items(): | |
st.subheader(k) | |
st.write(v['description']) | |
def area(): | |
with st.sidebar: | |
with st.form("form1"): | |
st.form_submit_button("Apply") | |
st.subheader('Variable') | |
var_name = st.selectbox('Select a variable:', [get_var_name(var) for var in DATA_VARIABLE['area']]) | |
# var_name = st.selectbox('Select a variable:', [get_var_name(var) for var in DATA_VARIABLE['area']], label_visibility='collapsed') | |
var = name2var(var_name) | |
st.subheader('Filters') | |
filters = {} | |
for fvar in DATA_FILTER[var]: | |
if fvar == 'YEAR': | |
continue | |
if is_numerical(fvar): | |
names = st.slider(fvar, min(get_unique(fvar)), max(get_unique(fvar)), | |
value=(min(get_unique(fvar)), max(get_unique(fvar)))) | |
else: | |
names = st.multiselect(fvar, get_unique(fvar), default=get_unique(fvar)) | |
filters[fvar] = set([name2id(fvar, n) for n in names]) | |
st.header(f'Area Chart - {var_name}') | |
# with st.expander(f'{var}'): | |
# st.write(DATA_META.get(var, {}).get('description', '')) | |
# st.write(len([v for k, v in DATA_META[var]['codes'].items() if k != -1])) | |
# vals = set([name2id(var, n) for n in names]) | |
with st.form("form2"): | |
container = st.container() | |
all = st.form_submit_button("Select all") | |
if all: | |
names = container.multiselect(f"Select one or more {var} values to display:", | |
get_unique(var), [v for k, v in DATA_META[var]['codes'].items() if k != -1]) | |
else: | |
names = container.multiselect(f"Select one or more {var} values to display:", | |
get_unique(var), | |
default=['Authors', 'Musicians and music teachers', 'Telephone operators', | |
'Bus drivers', 'Cashiers', 'Architects', | |
'Chemists'] if var == 'OCC1950' else ['Drugs and medicines', | |
'Fisheries', | |
'Glass and glass products', ]) | |
st.form_submit_button("Apply") | |
df = get_area_data(var, filters) | |
df = df[df[var].isin(names)] | |
# st.write(df) | |
if len(df) == 0: | |
st.warning('Select some data to display') | |
return | |
selection = alt.selection_multi(fields=[var], bind='legend') | |
plot = alt.Chart(df, title=f'Count of Different {var} Values').mark_area().encode(alt.X('YEAR'), | |
alt.Y('count', title='count', | |
stack='zero'), | |
alt.Color(var, | |
scale=alt.Scale( | |
scheme='category20'), | |
legend=alt.Legend( | |
# orient='bottom', | |
# columns=3 | |
)), | |
opacity=alt.condition(selection, | |
alt.value( | |
1), | |
alt.value( | |
0.3)), | |
tooltip=['YEAR', var, 'count'] | |
) \ | |
.properties(height=400 + len(names)).add_selection(selection) | |
st.altair_chart(plot, use_container_width=True) | |
def get_area_data(var, filters=None): | |
# for k, v in filters.items(): | |
# st.write(k) | |
# st.write(v) | |
# st.write(DATA_MAIN[k].unique()) | |
# st.write(len(DATA_MAIN)) | |
if filters is not None: | |
df = DATA_MAIN[['YEAR', var, *filters.keys()]] | |
# df = df[df[var].isin(vals)] | |
with st.spinner('filtering...'): | |
for fvar, fvals in filters.items(): | |
df = df[df[fvar].isin(fvals)] | |
df = df[['YEAR', var]] | |
else: | |
df = DATA_MAIN[['YEAR', var]] | |
# st.write(len(df)) | |
# st.write(df.head()) | |
with st.spinner('counting...'): | |
groups = df.groupby([var, 'YEAR'])[var].count().to_frame().rename(columns={var: 'count'}).reset_index() | |
groups[var] = groups[var].apply(lambda x: id2name(var, x)) | |
return groups | |
def line(): | |
st.header('Line Graph') | |
def scatter(): | |
st.header('Scatter Plot') | |
def bar(): | |
st.header('Bar Chart') | |
def box(): | |
st.header('Box Plot') | |
def heat(): | |
st.header('Heatmap') | |
def hist(): | |
st.header('Histogram') | |
if __name__ == '__main__': | |
main() | |