File size: 3,155 Bytes
475dc77 caffa37 89af670 475dc77 89af670 475dc77 89af670 475dc77 89af670 475dc77 caffa37 475dc77 caffa37 89af670 475dc77 89af670 475dc77 89af670 475dc77 89af670 475dc77 89af670 475dc77 89af670 475dc77 caffa37 475dc77 89af670 475dc77 89af670 475dc77 89af670 475dc77 caffa37 89af670 caffa37 475dc77 89af670 475dc77 caffa37 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 |
import gradio as gr
import pandas as pd
import plotly.graph_objects as go
import numpy as np
def plot_zip_code_correlation(zip_codes_str, start_date, end_date):
# Validate dates
start_year = pd.to_datetime(start_date).year
end_year = pd.to_datetime(end_date).year
if start_year < 2000 or end_year < 2000:
raise ValueError("Please select dates no earlier than the year 2000.")
if start_year > end_year:
raise ValueError("Start date must be before end date.")
# Process ZIP codes (ensure 5-digit format)
zip_codes = [z.strip().zfill(5) for z in zip_codes_str.split(",")]
# Load data
df = pd.read_csv('https://files.zillowstatic.com/research/public_csvs/zhvi/Zip_zhvi_uc_sfrcondo_tier_0.33_0.67_sm_sa_month.csv')
# Ensure ZIP codes in dataframe are strings with leading zeros
df['RegionName'] = df['RegionName'].astype(str).str.zfill(5)
df = df[df['RegionName'].isin(zip_codes)]
if df.empty:
raise ValueError("No data found for the provided ZIP codes.")
# Extract date columns within the selected range
date_columns = []
for col in df.columns[7:]:
try:
date = pd.to_datetime(col)
if start_date <= str(date.date()) <= end_date:
date_columns.append(col)
except:
continue
if not date_columns:
raise ValueError("No data available within the selected date range.")
# Build price matrix
price_matrix = []
valid_zip_list = []
for zip_code in zip_codes:
df_zip = df[df['RegionName'] == zip_code]
if not df_zip.empty:
prices = df_zip.loc[:, date_columns].values.flatten()
if not np.isnan(prices).all():
price_matrix.append(prices)
valid_zip_list.append(zip_code)
if len(price_matrix) < 2:
raise ValueError(f"Not enough data for correlation calculation. Ensure at least two valid ZIP codes with overlapping data between {start_date} and {end_date}.")
price_matrix_df = pd.DataFrame(price_matrix, index=valid_zip_list, columns=date_columns)
price_matrix_df = price_matrix_df.T.dropna()
# Calculate correlation matrix
corr_matrix = price_matrix_df.corr()
# Prepare 3D plot
z_data = corr_matrix.values
x_data, y_data = np.meshgrid(valid_zip_list, valid_zip_list)
fig = go.Figure(data=[go.Surface(z=z_data, x=x_data, y=y_data)])
fig.update_layout(
title=f'3D Correlation Matrix of Housing Prices ({start_date} to {end_date})',
scene=dict(
xaxis_title='ZIP Code',
yaxis_title='ZIP Code',
zaxis_title='Correlation',
),
autosize=True
)
return fig
iface = gr.Interface(
fn=plot_zip_code_correlation,
inputs=[
gr.Textbox(label="Enter comma-separated ZIP codes (e.g., 07001,07002,07003)"),
gr.Textbox(label="Start Date (YYYY-MM-DD) - No earlier than 2000"),
gr.Textbox(label="End Date (YYYY-MM-DD) - No earlier than 2000")
],
outputs=gr.Plot(),
title="3D ZIP Code Housing Price Correlation Matrix"
)
iface.launch(share=False, debug=True) |