dibend's picture
Update app.py
41c3b0f
raw
history blame
4.21 kB
import gradio as gr
import pandas as pd
import plotly.graph_objects as go
from sklearn.linear_model import LinearRegression
import numpy as np
from pandas.tseries.offsets import MonthEnd
import datetime
def plot_and_predict(zip, start_year, start_month, prediction_months):
# Input validation for ZIP code
if not zip.isdigit() or len(zip) != 5:
return "Error: Please enter a valid 5-digit ZIP code."
# Input validation for start year
current_year = datetime.datetime.now().year
if not start_year.isdigit() or not (2000 <= int(start_year) <= current_year):
return f"Error: Please enter a valid year between 2000 and {current_year}."
# Convert start_month to integer and combine year and month into a start date
try:
start_month_int = int(start_month)
start_date = f"{start_year}-{start_month_int:02d}-01"
except ValueError:
return "Error: Invalid start month. Please enter a numeric month between 1 and 12."
start_date = pd.to_datetime(start_date)
# Read and process the real estate data from Zillow
df = pd.read_csv('https://files.zillowstatic.com/research/public_csvs/zhvi/Zip_zhvi_uc_sfrcondo_tier_0.33_0.67_sm_sa_month.csv')
df = df[df['RegionName'] == int(zip)]
df = df.loc[:, '2000-01-31':]
df = df.T.reset_index()
df.columns = ['Date', 'Price']
df['Date'] = pd.to_datetime(df['Date'])
# Filter data based on start date
df = df[df['Date'] >= start_date]
# Check for empty dataframe
if df.empty:
return "Error: No data found for the provided ZIP code or start year/month. Please check your inputs."
# Train linear regression model
df['MonthsSinceStart'] = np.arange(len(df))
X = df['MonthsSinceStart'].values.reshape(-1, 1)
y = df['Price'].values
model = LinearRegression()
model.fit(X, y)
# Predict future prices
last_month_index = df['MonthsSinceStart'].iloc[-1]
future_months = np.array([last_month_index + i for i in range(1, prediction_months + 1)]).reshape(-1, 1)
predicted_prices = model.predict(future_months)
# Calculate standard error and prediction intervals
se = np.sqrt(np.sum((model.predict(X) - y) ** 2) / (len(y) - 2))
t = stats.t.ppf(0.975, len(y) - 2) # 95% prediction interval
prediction_interval = t * se * np.sqrt(1 + 1/len(y) + (future_months - np.mean(X))**2 / np.sum((X - np.mean(X))**2))
upper_bound = predicted_prices + prediction_interval
lower_bound = predicted_prices - prediction_interval
# Prepare data for plotting
historical_prices_trace = go.Scatter(
x=df['Date'],
y=df['Price'],
mode="lines",
name="Historical Prices"
)
future_dates = [df['Date'].iloc[-1] + MonthEnd(i) for i in range(1, prediction_months + 1)]
predicted_prices_trace = go.Scatter(
x=future_dates,
y=predicted_prices,
mode="lines",
name="Predicted Prices"
)
# Plot prediction intervals
fig.add_traces([
go.Scatter(x=future_dates, y=upper_bound.flatten(), mode='lines', name='Upper Bound', line=dict(width=0)),
go.Scatter(x=future_dates, y=lower_bound.flatten(), mode='lines', name='Lower Bound', line=dict(width=0), fill='tonexty')
])
# Plot data
fig = go.Figure()
fig.add_trace(historical_prices_trace)
fig.add_trace(predicted_prices_trace)
fig.update_layout(
title=f"Real Estate Price Prediction for Zip Code {zip}",
xaxis_title="Date",
yaxis_title="Price",
legend_title_text="Data"
)
return fig
# Gradio interface with updated inputs
interface = gr.Interface(
fn=plot_and_predict,
inputs=[
gr.Textbox(label="ZIP Code", placeholder="e.g., 90210"),
gr.Textbox(label="Start Year", placeholder="e.g., 2020"),
gr.Dropdown(label="Start Month", choices=[str(i) for i in range(1, 13)]),
gr.Slider(minimum=1, maximum=60, step=1, label="Prediction Months"),
],
outputs="plot",
title="Real Estate Price Predictor",
description="Enter a ZIP code, start year, start month, and the number of months for price prediction."
)
# Launch the app
interface.launch(debug=True)