File size: 3,089 Bytes
475dc77
 
 
 
c8d21d7
 
 
 
 
475dc77
caffa37
 
 
 
 
 
 
 
89af670
475dc77
89af670
 
475dc77
 
 
c8d21d7
caffa37
 
 
475dc77
89af670
475dc77
89af670
475dc77
c8d21d7
475dc77
 
89af670
475dc77
 
c8d21d7
475dc77
c8d21d7
475dc77
 
89af670
475dc77
 
 
89af670
c8d21d7
475dc77
 
 
 
 
 
caffa37
89af670
caffa37
 
 
475dc77
c8d21d7
 
 
 
 
 
 
 
 
475dc77
 
caffa37
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import gradio as gr
import pandas as pd
import plotly.graph_objects as go
import numpy as np
from datetime import datetime

# Fetch the latest data date from the CSV
df_sample = pd.read_csv('https://files.zillowstatic.com/research/public_csvs/zhvi/Zip_zhvi_uc_sfrcondo_tier_0.33_0.67_sm_sa_month.csv', nrows=1)
latest_data_date_str = df_sample.columns[-1]

def plot_zip_code_correlation(zip_codes_str, start_date, end_date):
    start_year = pd.to_datetime(start_date).year
    end_year = pd.to_datetime(end_date).year
    if start_year < 2000 or end_year < 2000:
        raise ValueError("Please select dates no earlier than the year 2000.")
    if start_year > end_year:
        raise ValueError("Start date must be before end date.")

    zip_codes = [z.strip().zfill(5) for z in zip_codes_str.split(",")]
    df = pd.read_csv('https://files.zillowstatic.com/research/public_csvs/zhvi/Zip_zhvi_uc_sfrcondo_tier_0.33_0.67_sm_sa_month.csv')
    df['RegionName'] = df['RegionName'].astype(str).str.zfill(5)
    df = df[df['RegionName'].isin(zip_codes)]
    if df.empty:
        raise ValueError("No data found for the provided ZIP codes.")

    date_columns = [col for col in df.columns[7:] if start_date <= col <= end_date]
    if not date_columns:
        raise ValueError("No data available within the selected date range.")

    price_matrix = []
    valid_zip_list = []
    for zip_code in zip_codes:
        df_zip = df[df['RegionName'] == zip_code]
        if not df_zip.empty:
            prices = df_zip[date_columns].values.flatten()
            if not np.isnan(prices).all():
                price_matrix.append(prices)
                valid_zip_list.append(zip_code)

    if len(price_matrix) < 2:
        raise ValueError("Not enough data for correlation calculation.")

    price_matrix_df = pd.DataFrame(price_matrix, index=valid_zip_list, columns=date_columns).T.dropna()
    corr_matrix = price_matrix_df.corr()
    z_data = corr_matrix.values
    x_data, y_data = np.meshgrid(valid_zip_list, valid_zip_list)

    fig = go.Figure(data=[go.Surface(z=z_data, x=x_data, y=y_data)])
    fig.update_layout(
        title=f'3D Correlation Matrix of Housing Prices ({start_date} to {end_date})',
        scene=dict(xaxis_title='ZIP Code', yaxis_title='ZIP Code', zaxis_title='Correlation'),
        autosize=True
    )
    return fig

iface = gr.Interface(
    fn=plot_zip_code_correlation,
    inputs=[
        gr.Textbox(label="Enter comma-separated ZIP codes (e.g., 07001,07002,07003)"),
        gr.Textbox(label="Start Date (YYYY-MM-DD) - No earlier than 2000"),
        gr.Textbox(label="End Date (YYYY-MM-DD) - No earlier than 2000")
    ],
    outputs=gr.Plot(),
    title="3D ZIP Code Housing Price Correlation Matrix",
    description=f"""
## US Real Estate Zip ZHVI Price Movement Correlation Matrix Gen

Track housing price correlations by ZIP code to make informed decisions as a property owner or buyer.  
**Data up to {latest_data_date_str}**. Enter ZIP codes below.

[Contact a real estate broker](https://micheled.com)
"""
)

iface.launch(share=False, debug=True)