|
import gradio as gr |
|
import pandas as pd |
|
import plotly.graph_objects as go |
|
from sklearn.linear_model import LinearRegression |
|
import numpy as np |
|
from pandas.tseries.offsets import MonthEnd |
|
import datetime |
|
|
|
def plot_and_predict(zip, start_year, start_month, prediction_months): |
|
|
|
if not zip.isdigit() or len(zip) != 5: |
|
return "Error: Please enter a valid 5-digit ZIP code." |
|
|
|
|
|
current_year = datetime.datetime.now().year |
|
if not start_year.isdigit() or not (2000 <= int(start_year) <= current_year): |
|
return f"Error: Please enter a valid year between 2000 and {current_year}." |
|
|
|
|
|
try: |
|
start_month_int = int(start_month) |
|
start_date = f"{start_year}-{start_month_int:02d}-01" |
|
except ValueError: |
|
return "Error: Invalid start month. Please enter a numeric month between 1 and 12." |
|
|
|
start_date = pd.to_datetime(start_date) |
|
|
|
|
|
df = pd.read_csv('https://files.zillowstatic.com/research/public_csvs/zhvi/Zip_zhvi_uc_sfrcondo_tier_0.33_0.67_sm_sa_month.csv') |
|
df = df[df['RegionName'] == int(zip)] |
|
df = df.loc[:, '2000-01-31':] |
|
df = df.T.reset_index() |
|
df.columns = ['Date', 'Price'] |
|
df['Date'] = pd.to_datetime(df['Date']) |
|
|
|
|
|
df = df[df['Date'] >= start_date] |
|
|
|
|
|
if df.empty: |
|
return "Error: No data found for the provided ZIP code or start year/month. Please check your inputs." |
|
|
|
|
|
df['MonthsSinceStart'] = np.arange(len(df)) |
|
X = df['MonthsSinceStart'].values.reshape(-1, 1) |
|
y = df['Price'].values |
|
model = LinearRegression() |
|
model.fit(X, y) |
|
|
|
|
|
last_month_index = df['MonthsSinceStart'].iloc[-1] |
|
future_months = np.array([last_month_index + i for i in range(1, prediction_months + 1)]).reshape(-1, 1) |
|
predicted_prices = model.predict(future_months) |
|
|
|
|
|
se = np.sqrt(np.sum((model.predict(X) - y) ** 2) / (len(y) - 2)) |
|
t = stats.t.ppf(0.975, len(y) - 2) |
|
prediction_interval = t * se * np.sqrt(1 + 1/len(y) + (future_months - np.mean(X))**2 / np.sum((X - np.mean(X))**2)) |
|
|
|
upper_bound = predicted_prices + prediction_interval |
|
lower_bound = predicted_prices - prediction_interval |
|
|
|
|
|
historical_prices_trace = go.Scatter( |
|
x=df['Date'], |
|
y=df['Price'], |
|
mode="lines", |
|
name="Historical Prices" |
|
) |
|
future_dates = [df['Date'].iloc[-1] + MonthEnd(i) for i in range(1, prediction_months + 1)] |
|
predicted_prices_trace = go.Scatter( |
|
x=future_dates, |
|
y=predicted_prices, |
|
mode="lines", |
|
name="Predicted Prices" |
|
) |
|
|
|
|
|
fig.add_traces([ |
|
go.Scatter(x=future_dates, y=upper_bound.flatten(), mode='lines', name='Upper Bound', line=dict(width=0)), |
|
go.Scatter(x=future_dates, y=lower_bound.flatten(), mode='lines', name='Lower Bound', line=dict(width=0), fill='tonexty') |
|
]) |
|
|
|
|
|
fig = go.Figure() |
|
fig.add_trace(historical_prices_trace) |
|
fig.add_trace(predicted_prices_trace) |
|
fig.update_layout( |
|
title=f"Real Estate Price Prediction for Zip Code {zip}", |
|
xaxis_title="Date", |
|
yaxis_title="Price", |
|
legend_title_text="Data" |
|
) |
|
|
|
return fig |
|
|
|
|
|
interface = gr.Interface( |
|
fn=plot_and_predict, |
|
inputs=[ |
|
gr.Textbox(label="ZIP Code", placeholder="e.g., 90210"), |
|
gr.Textbox(label="Start Year", placeholder="e.g., 2020"), |
|
gr.Dropdown(label="Start Month", choices=[str(i) for i in range(1, 13)]), |
|
gr.Slider(minimum=1, maximum=60, step=1, label="Prediction Months"), |
|
], |
|
outputs="plot", |
|
title="Real Estate Price Predictor", |
|
description="Enter a ZIP code, start year, start month, and the number of months for price prediction." |
|
) |
|
|
|
|
|
interface.launch(debug=True) |