File size: 4,214 Bytes
b5c06f5
 
 
 
49525d0
 
bf8aa5a
b5c06f5
bf8aa5a
d9b8500
 
 
 
 
 
 
 
 
5dada9c
 
 
 
 
 
 
d9b8500
 
b5c06f5
 
 
 
 
 
 
 
d9b8500
ae905a3
 
d9b8500
 
 
 
b5c06f5
49525d0
 
b5c06f5
 
 
 
 
49525d0
 
 
b5c06f5
41c3b0f
 
 
 
 
 
 
 
b5c06f5
 
 
 
 
 
 
49525d0
b5c06f5
 
 
 
 
 
 
41c3b0f
 
 
 
 
 
b5c06f5
 
 
 
 
 
 
 
 
 
 
 
 
d9b8500
b5c06f5
 
 
d9b8500
 
263a565
9d62ef5
b5c06f5
bf8aa5a
 
 
b5c06f5
 
 
bf8aa5a
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
import gradio as gr
import pandas as pd
import plotly.graph_objects as go
from sklearn.linear_model import LinearRegression
import numpy as np
from pandas.tseries.offsets import MonthEnd
import datetime

def plot_and_predict(zip, start_year, start_month, prediction_months):
    # Input validation for ZIP code
    if not zip.isdigit() or len(zip) != 5:
        return "Error: Please enter a valid 5-digit ZIP code."
    
    # Input validation for start year
    current_year = datetime.datetime.now().year
    if not start_year.isdigit() or not (2000 <= int(start_year) <= current_year):
        return f"Error: Please enter a valid year between 2000 and {current_year}."

    # Convert start_month to integer and combine year and month into a start date
    try:
        start_month_int = int(start_month)
        start_date = f"{start_year}-{start_month_int:02d}-01"
    except ValueError:
        return "Error: Invalid start month. Please enter a numeric month between 1 and 12."

    start_date = pd.to_datetime(start_date)

    # Read and process the real estate data from Zillow
    df = pd.read_csv('https://files.zillowstatic.com/research/public_csvs/zhvi/Zip_zhvi_uc_sfrcondo_tier_0.33_0.67_sm_sa_month.csv')
    df = df[df['RegionName'] == int(zip)]
    df = df.loc[:, '2000-01-31':]
    df = df.T.reset_index()
    df.columns = ['Date', 'Price']
    df['Date'] = pd.to_datetime(df['Date'])

    # Filter data based on start date
    df = df[df['Date'] >= start_date]

    # Check for empty dataframe
    if df.empty:
        return "Error: No data found for the provided ZIP code or start year/month. Please check your inputs."

    # Train linear regression model
    df['MonthsSinceStart'] = np.arange(len(df))
    X = df['MonthsSinceStart'].values.reshape(-1, 1)
    y = df['Price'].values
    model = LinearRegression()
    model.fit(X, y)

    # Predict future prices
    last_month_index = df['MonthsSinceStart'].iloc[-1]
    future_months = np.array([last_month_index + i for i in range(1, prediction_months + 1)]).reshape(-1, 1)
    predicted_prices = model.predict(future_months)

    # Calculate standard error and prediction intervals
    se = np.sqrt(np.sum((model.predict(X) - y) ** 2) / (len(y) - 2))
    t = stats.t.ppf(0.975, len(y) - 2)  # 95% prediction interval
    prediction_interval = t * se * np.sqrt(1 + 1/len(y) + (future_months - np.mean(X))**2 / np.sum((X - np.mean(X))**2))

    upper_bound = predicted_prices + prediction_interval
    lower_bound = predicted_prices - prediction_interval

    # Prepare data for plotting
    historical_prices_trace = go.Scatter(
        x=df['Date'],
        y=df['Price'],
        mode="lines",
        name="Historical Prices"
    )
    future_dates = [df['Date'].iloc[-1] + MonthEnd(i) for i in range(1, prediction_months + 1)]
    predicted_prices_trace = go.Scatter(
        x=future_dates,
        y=predicted_prices,
        mode="lines",
        name="Predicted Prices"
    )

    # Plot prediction intervals
    fig.add_traces([
        go.Scatter(x=future_dates, y=upper_bound.flatten(), mode='lines', name='Upper Bound', line=dict(width=0)),
        go.Scatter(x=future_dates, y=lower_bound.flatten(), mode='lines', name='Lower Bound', line=dict(width=0), fill='tonexty')
    ])
    
    # Plot data
    fig = go.Figure()
    fig.add_trace(historical_prices_trace)
    fig.add_trace(predicted_prices_trace)
    fig.update_layout(
        title=f"Real Estate Price Prediction for Zip Code {zip}",
        xaxis_title="Date",
        yaxis_title="Price",
        legend_title_text="Data"
    )

    return fig

# Gradio interface with updated inputs
interface = gr.Interface(
    fn=plot_and_predict,
    inputs=[
        gr.Textbox(label="ZIP Code", placeholder="e.g., 90210"),
        gr.Textbox(label="Start Year", placeholder="e.g., 2020"),
        gr.Dropdown(label="Start Month", choices=[str(i) for i in range(1, 13)]),
        gr.Slider(minimum=1, maximum=60, step=1, label="Prediction Months"),
    ],
    outputs="plot",
    title="Real Estate Price Predictor",
    description="Enter a ZIP code, start year, start month, and the number of months for price prediction."
)

# Launch the app
interface.launch(debug=True)