Spaces:
Sleeping
Sleeping
| import datetime | |
| import os | |
| import pathlib | |
| import requests | |
| import zipfile | |
| import pandas as pd | |
| import pydeck as pdk | |
| import geopandas as gpd | |
| import streamlit as st | |
| import leafmap.colormaps as cm | |
| from leafmap.common import hex_to_rgb | |
| st.set_page_config(layout="wide") | |
| STREAMLIT_STATIC_PATH = pathlib.Path(st.__path__[0]) / "static" | |
| # We create a downloads directory within the streamlit static asset directory | |
| # and we write output files to it | |
| DOWNLOADS_PATH = STREAMLIT_STATIC_PATH / "downloads" | |
| if not DOWNLOADS_PATH.is_dir(): | |
| DOWNLOADS_PATH.mkdir() | |
| # Data source: https://www.realtor.com/research/data/ | |
| # link_prefix = "https://econdata.s3-us-west-2.amazonaws.com/Reports/" | |
| link_prefix = "https://raw.githubusercontent.com/giswqs/data/main/housing/" | |
| data_links = { | |
| "weekly": { | |
| "national": link_prefix + "Core/listing_weekly_core_aggregate_by_country.csv", | |
| "metro": link_prefix + "Core/listing_weekly_core_aggregate_by_metro.csv", | |
| }, | |
| "monthly_current": { | |
| "national": link_prefix + "Core/RDC_Inventory_Core_Metrics_Country.csv", | |
| "state": link_prefix + "Core/RDC_Inventory_Core_Metrics_State.csv", | |
| "metro": link_prefix + "Core/RDC_Inventory_Core_Metrics_Metro.csv", | |
| "county": link_prefix + "Core/RDC_Inventory_Core_Metrics_County.csv", | |
| "zip": link_prefix + "Core/RDC_Inventory_Core_Metrics_Zip.csv", | |
| }, | |
| "monthly_historical": { | |
| "national": link_prefix + "Core/RDC_Inventory_Core_Metrics_Country_History.csv", | |
| "state": link_prefix + "Core/RDC_Inventory_Core_Metrics_State_History.csv", | |
| "metro": link_prefix + "Core/RDC_Inventory_Core_Metrics_Metro_History.csv", | |
| "county": link_prefix + "Core/RDC_Inventory_Core_Metrics_County_History.csv", | |
| "zip": link_prefix + "Core/RDC_Inventory_Core_Metrics_Zip_History.csv", | |
| }, | |
| "hotness": { | |
| "metro": link_prefix | |
| + "Hotness/RDC_Inventory_Hotness_Metrics_Metro_History.csv", | |
| "county": link_prefix | |
| + "Hotness/RDC_Inventory_Hotness_Metrics_County_History.csv", | |
| "zip": link_prefix + "Hotness/RDC_Inventory_Hotness_Metrics_Zip_History.csv", | |
| }, | |
| } | |
| def get_data_columns(df, category, frequency="monthly"): | |
| if frequency == "monthly": | |
| if category.lower() == "county": | |
| del_cols = ["month_date_yyyymm", "county_fips", "county_name"] | |
| elif category.lower() == "state": | |
| del_cols = ["month_date_yyyymm", "state", "state_id"] | |
| elif category.lower() == "national": | |
| del_cols = ["month_date_yyyymm", "country"] | |
| elif category.lower() == "metro": | |
| del_cols = ["month_date_yyyymm", "cbsa_code", "cbsa_title", "HouseholdRank"] | |
| elif category.lower() == "zip": | |
| del_cols = ["month_date_yyyymm", "postal_code", "zip_name", "flag"] | |
| elif frequency == "weekly": | |
| if category.lower() == "national": | |
| del_cols = ["week_end_date", "geo_country"] | |
| elif category.lower() == "metro": | |
| del_cols = ["week_end_date", "cbsa_code", "cbsa_title", "hh_rank"] | |
| cols = df.columns.values.tolist() | |
| for col in cols: | |
| if col.strip() in del_cols: | |
| cols.remove(col) | |
| if category.lower() == "metro": | |
| return cols[2:] | |
| else: | |
| return cols[1:] | |
| def get_inventory_data(url): | |
| df = pd.read_csv(url) | |
| url = url.lower() | |
| if "county" in url: | |
| df["county_fips"] = df["county_fips"].map(str) | |
| df["county_fips"] = df["county_fips"].str.zfill(5) | |
| elif "state" in url: | |
| df["STUSPS"] = df["state_id"].str.upper() | |
| elif "metro" in url: | |
| df["cbsa_code"] = df["cbsa_code"].map(str) | |
| elif "zip" in url: | |
| df["postal_code"] = df["postal_code"].map(str) | |
| df["postal_code"] = df["postal_code"].str.zfill(5) | |
| if "listing_weekly_core_aggregate_by_country" in url: | |
| columns = get_data_columns(df, "national", "weekly") | |
| for column in columns: | |
| if column != "median_days_on_market_by_day_yy": | |
| df[column] = df[column].str.rstrip("%").astype(float) / 100 | |
| if "listing_weekly_core_aggregate_by_metro" in url: | |
| columns = get_data_columns(df, "metro", "weekly") | |
| for column in columns: | |
| if column != "median_days_on_market_by_day_yy": | |
| df[column] = df[column].str.rstrip("%").astype(float) / 100 | |
| df["cbsa_code"] = df["cbsa_code"].str[:5] | |
| return df | |
| def filter_weekly_inventory(df, week): | |
| df = df[df["week_end_date"] == week] | |
| return df | |
| def get_start_end_year(df): | |
| start_year = int(str(df["month_date_yyyymm"].min())[:4]) | |
| end_year = int(str(df["month_date_yyyymm"].max())[:4]) | |
| return start_year, end_year | |
| def get_periods(df): | |
| return [str(d) for d in list(set(df["month_date_yyyymm"].tolist()))] | |
| def get_geom_data(category): | |
| prefix = ( | |
| "https://raw.githubusercontent.com/giswqs/streamlit-geospatial/master/data/" | |
| ) | |
| links = { | |
| "national": prefix + "us_nation.geojson", | |
| "state": prefix + "us_states.geojson", | |
| "county": prefix + "us_counties.geojson", | |
| "metro": prefix + "us_metro_areas.geojson", | |
| "zip": "https://www2.census.gov/geo/tiger/GENZ2018/shp/cb_2018_us_zcta510_500k.zip", | |
| } | |
| if category.lower() == "zip": | |
| r = requests.get(links[category]) | |
| out_zip = os.path.join(DOWNLOADS_PATH, "cb_2018_us_zcta510_500k.zip") | |
| with open(out_zip, "wb") as code: | |
| code.write(r.content) | |
| zip_ref = zipfile.ZipFile(out_zip, "r") | |
| zip_ref.extractall(DOWNLOADS_PATH) | |
| gdf = gpd.read_file(out_zip.replace("zip", "shp")) | |
| else: | |
| gdf = gpd.read_file(links[category]) | |
| return gdf | |
| def join_attributes(gdf, df, category): | |
| new_gdf = None | |
| if category == "county": | |
| new_gdf = gdf.merge(df, left_on="GEOID", right_on="county_fips", how="outer") | |
| elif category == "state": | |
| new_gdf = gdf.merge(df, left_on="STUSPS", right_on="STUSPS", how="outer") | |
| elif category == "national": | |
| if "geo_country" in df.columns.values.tolist(): | |
| df["country"] = None | |
| df.loc[0, "country"] = "United States" | |
| new_gdf = gdf.merge(df, left_on="NAME", right_on="country", how="outer") | |
| elif category == "metro": | |
| new_gdf = gdf.merge(df, left_on="CBSAFP", right_on="cbsa_code", how="outer") | |
| elif category == "zip": | |
| new_gdf = gdf.merge(df, left_on="GEOID10", right_on="postal_code", how="outer") | |
| return new_gdf | |
| def select_non_null(gdf, col_name): | |
| new_gdf = gdf[~gdf[col_name].isna()] | |
| return new_gdf | |
| def select_null(gdf, col_name): | |
| new_gdf = gdf[gdf[col_name].isna()] | |
| return new_gdf | |
| def get_data_dict(name): | |
| in_csv = os.path.join(os.getcwd(), "data/realtor_data_dict.csv") | |
| df = pd.read_csv(in_csv) | |
| label = list(df[df["Name"] == name]["Label"])[0] | |
| desc = list(df[df["Name"] == name]["Description"])[0] | |
| return label, desc | |
| def get_weeks(df): | |
| seq = list(set(df[~df["week_end_date"].isnull()]["week_end_date"].tolist())) | |
| weeks = [ | |
| datetime.date(int(d.split("/")[2]), int(d.split("/")[0]), int(d.split("/")[1])) | |
| for d in seq | |
| ] | |
| weeks.sort() | |
| return weeks | |
| def get_saturday(in_date): | |
| idx = (in_date.weekday() + 1) % 7 | |
| sat = in_date + datetime.timedelta(6 - idx) | |
| return sat | |
| def app(): | |
| st.title("U.S. Real Estate Data and Market Trends") | |
| st.markdown( | |
| """**Introduction:** This interactive dashboard is designed for visualizing U.S. real estate data and market trends at multiple levels (i.e., national, | |
| state, county, and metro). The data sources include [Real Estate Data](https://www.realtor.com/research/data) from realtor.com and | |
| [Cartographic Boundary Files](https://www.census.gov/geographies/mapping-files/time-series/geo/carto-boundary-file.html) from U.S. Census Bureau. | |
| Several open-source packages are used to process the data and generate the visualizations, e.g., [streamlit](https://streamlit.io), | |
| [geopandas](https://geopandas.org), [leafmap](https://leafmap.org), and [pydeck](https://deckgl.readthedocs.io). | |
| """ | |
| ) | |
| with st.expander("See a demo"): | |
| st.image("https://i.imgur.com/Z3dk6Tr.gif") | |
| row1_col1, row1_col2, row1_col3, row1_col4, row1_col5 = st.columns( | |
| [0.6, 0.8, 0.6, 1.4, 2] | |
| ) | |
| with row1_col1: | |
| frequency = st.selectbox("Monthly/weekly data", ["Monthly", "Weekly"]) | |
| with row1_col2: | |
| types = ["Current month data", "Historical data"] | |
| if frequency == "Weekly": | |
| types.remove("Current month data") | |
| cur_hist = st.selectbox( | |
| "Current/historical data", | |
| types, | |
| ) | |
| with row1_col3: | |
| if frequency == "Monthly": | |
| scale = st.selectbox( | |
| "Scale", ["National", "State", "Metro", "County"], index=3 | |
| ) | |
| else: | |
| scale = st.selectbox("Scale", ["National", "Metro"], index=1) | |
| gdf = get_geom_data(scale.lower()) | |
| if frequency == "Weekly": | |
| inventory_df = get_inventory_data(data_links["weekly"][scale.lower()]) | |
| weeks = get_weeks(inventory_df) | |
| with row1_col1: | |
| selected_date = st.date_input("Select a date", value=weeks[-1]) | |
| saturday = get_saturday(selected_date) | |
| selected_period = saturday.strftime("%-m/%-d/%Y") | |
| if saturday not in weeks: | |
| st.error( | |
| "The selected date is not available in the data. Please select a date between {} and {}".format( | |
| weeks[0], weeks[-1] | |
| ) | |
| ) | |
| selected_period = weeks[-1].strftime("%-m/%-d/%Y") | |
| inventory_df = get_inventory_data(data_links["weekly"][scale.lower()]) | |
| inventory_df = filter_weekly_inventory(inventory_df, selected_period) | |
| if frequency == "Monthly": | |
| if cur_hist == "Current month data": | |
| inventory_df = get_inventory_data( | |
| data_links["monthly_current"][scale.lower()] | |
| ) | |
| selected_period = get_periods(inventory_df)[0] | |
| else: | |
| with row1_col2: | |
| inventory_df = get_inventory_data( | |
| data_links["monthly_historical"][scale.lower()] | |
| ) | |
| start_year, end_year = get_start_end_year(inventory_df) | |
| periods = get_periods(inventory_df) | |
| with st.expander("Select year and month", True): | |
| selected_year = st.slider( | |
| "Year", | |
| start_year, | |
| end_year, | |
| value=start_year, | |
| step=1, | |
| ) | |
| selected_month = st.slider( | |
| "Month", | |
| min_value=1, | |
| max_value=12, | |
| value=int(periods[0][-2:]), | |
| step=1, | |
| ) | |
| selected_period = str(selected_year) + str(selected_month).zfill(2) | |
| if selected_period not in periods: | |
| st.error("Data not available for selected year and month") | |
| selected_period = periods[0] | |
| inventory_df = inventory_df[ | |
| inventory_df["month_date_yyyymm"] == int(selected_period) | |
| ] | |
| data_cols = get_data_columns(inventory_df, scale.lower(), frequency.lower()) | |
| with row1_col4: | |
| selected_col = st.selectbox("Attribute", data_cols) | |
| with row1_col5: | |
| show_desc = st.checkbox("Show attribute description") | |
| if show_desc: | |
| try: | |
| label, desc = get_data_dict(selected_col.strip()) | |
| markdown = f""" | |
| **{label}**: {desc} | |
| """ | |
| st.markdown(markdown) | |
| except: | |
| st.warning("No description available for selected attribute") | |
| row2_col1, row2_col2, row2_col3, row2_col4, row2_col5, row2_col6 = st.columns( | |
| [0.6, 0.68, 0.7, 0.7, 1.5, 0.8] | |
| ) | |
| palettes = cm.list_colormaps() | |
| with row2_col1: | |
| palette = st.selectbox("Color palette", palettes, index=palettes.index("Blues")) | |
| with row2_col2: | |
| n_colors = st.slider("Number of colors", min_value=2, max_value=20, value=8) | |
| with row2_col3: | |
| show_nodata = st.checkbox("Show nodata areas", value=True) | |
| with row2_col4: | |
| show_3d = st.checkbox("Show 3D view", value=False) | |
| with row2_col5: | |
| if show_3d: | |
| elev_scale = st.slider( | |
| "Elevation scale", min_value=1, max_value=1000000, value=1, step=10 | |
| ) | |
| with row2_col6: | |
| st.info("Press Ctrl and move the left mouse button.") | |
| else: | |
| elev_scale = 1 | |
| gdf = join_attributes(gdf, inventory_df, scale.lower()) | |
| gdf_null = select_null(gdf, selected_col) | |
| gdf = select_non_null(gdf, selected_col) | |
| gdf = gdf.sort_values(by=selected_col, ascending=True) | |
| colors = cm.get_palette(palette, n_colors) | |
| colors = [hex_to_rgb(c) for c in colors] | |
| for i, ind in enumerate(gdf.index): | |
| index = int(i / (len(gdf) / len(colors))) | |
| if index >= len(colors): | |
| index = len(colors) - 1 | |
| gdf.loc[ind, "R"] = colors[index][0] | |
| gdf.loc[ind, "G"] = colors[index][1] | |
| gdf.loc[ind, "B"] = colors[index][2] | |
| initial_view_state = pdk.ViewState( | |
| latitude=40, | |
| longitude=-100, | |
| zoom=3, | |
| max_zoom=16, | |
| pitch=0, | |
| bearing=0, | |
| height=900, | |
| width=None, | |
| ) | |
| min_value = gdf[selected_col].min() | |
| max_value = gdf[selected_col].max() | |
| color = "color" | |
| # color_exp = f"[({selected_col}-{min_value})/({max_value}-{min_value})*255, 0, 0]" | |
| color_exp = f"[R, G, B]" | |
| geojson = pdk.Layer( | |
| "GeoJsonLayer", | |
| gdf, | |
| pickable=True, | |
| opacity=0.5, | |
| stroked=True, | |
| filled=True, | |
| extruded=show_3d, | |
| wireframe=True, | |
| get_elevation=f"{selected_col}", | |
| elevation_scale=elev_scale, | |
| # get_fill_color="color", | |
| get_fill_color=color_exp, | |
| get_line_color=[0, 0, 0], | |
| get_line_width=2, | |
| line_width_min_pixels=1, | |
| ) | |
| geojson_null = pdk.Layer( | |
| "GeoJsonLayer", | |
| gdf_null, | |
| pickable=True, | |
| opacity=0.2, | |
| stroked=True, | |
| filled=True, | |
| extruded=False, | |
| wireframe=True, | |
| # get_elevation="properties.ALAND/100000", | |
| # get_fill_color="color", | |
| get_fill_color=[200, 200, 200], | |
| get_line_color=[0, 0, 0], | |
| get_line_width=2, | |
| line_width_min_pixels=1, | |
| ) | |
| # tooltip = {"text": "Name: {NAME}"} | |
| # tooltip_value = f"<b>Value:</b> {median_listing_price}"" | |
| tooltip = { | |
| "html": "<b>Name:</b> {NAME}<br><b>Value:</b> {" | |
| + selected_col | |
| + "}<br><b>Date:</b> " | |
| + selected_period | |
| + "", | |
| "style": {"backgroundColor": "steelblue", "color": "white"}, | |
| } | |
| layers = [geojson] | |
| if show_nodata: | |
| layers.append(geojson_null) | |
| r = pdk.Deck( | |
| layers=layers, | |
| initial_view_state=initial_view_state, | |
| map_style="light", | |
| tooltip=tooltip, | |
| ) | |
| row3_col1, row3_col2 = st.columns([6, 1]) | |
| with row3_col1: | |
| st.pydeck_chart(r) | |
| with row3_col2: | |
| st.write( | |
| cm.create_colormap( | |
| palette, | |
| label=selected_col.replace("_", " ").title(), | |
| width=0.2, | |
| height=3, | |
| orientation="vertical", | |
| vmin=min_value, | |
| vmax=max_value, | |
| font_size=10, | |
| ) | |
| ) | |
| row4_col1, row4_col2, row4_col3 = st.columns([1, 2, 3]) | |
| with row4_col1: | |
| show_data = st.checkbox("Show raw data") | |
| with row4_col2: | |
| show_cols = st.multiselect("Select columns", data_cols) | |
| with row4_col3: | |
| show_colormaps = st.checkbox("Preview all color palettes") | |
| if show_colormaps: | |
| st.write(cm.plot_colormaps(return_fig=True)) | |
| if show_data: | |
| if scale == "National": | |
| st.dataframe(gdf[["NAME", "GEOID"] + show_cols]) | |
| elif scale == "State": | |
| st.dataframe(gdf[["NAME", "STUSPS"] + show_cols]) | |
| elif scale == "County": | |
| st.dataframe(gdf[["NAME", "STATEFP", "COUNTYFP"] + show_cols]) | |
| elif scale == "Metro": | |
| st.dataframe(gdf[["NAME", "CBSAFP"] + show_cols]) | |
| elif scale == "Zip": | |
| st.dataframe(gdf[["GEOID10"] + show_cols]) | |
| app() | |