import gradio as gr import pandas as pd import plotly.graph_objects as go from sklearn.linear_model import LinearRegression import numpy as np from pandas.tseries.offsets import MonthEnd import datetime def plot_and_predict(zip, start_year, start_month, prediction_months): # Input validation for ZIP code if not zip.isdigit() or len(zip) != 5: return "Error: Please enter a valid 5-digit ZIP code." # Input validation for start year current_year = datetime.datetime.now().year if not start_year.isdigit() or not (2000 <= int(start_year) <= current_year): return f"Error: Please enter a valid year between 2000 and {current_year}." # Convert start_month to integer and combine year and month into a start date try: start_month_int = int(start_month) start_date = f"{start_year}-{start_month_int:02d}-01" except ValueError: return "Error: Invalid start month. Please enter a numeric month between 1 and 12." start_date = pd.to_datetime(start_date) # Read and process the real estate data from Zillow df = pd.read_csv('https://files.zillowstatic.com/research/public_csvs/zhvi/Zip_zhvi_uc_sfrcondo_tier_0.33_0.67_sm_sa_month.csv') df = df[df['RegionName'] == int(zip)] df = df.loc[:, '2000-01-31':] df = df.T.reset_index() df.columns = ['Date', 'Price'] df['Date'] = pd.to_datetime(df['Date']) # Filter data based on start date df = df[df['Date'] >= start_date] # Check for empty dataframe if df.empty: return "Error: No data found for the provided ZIP code or start year/month. Please check your inputs." # Train linear regression model df['MonthsSinceStart'] = np.arange(len(df)) X = df['MonthsSinceStart'].values.reshape(-1, 1) y = df['Price'].values model = LinearRegression() model.fit(X, y) # Predict future prices last_month_index = df['MonthsSinceStart'].iloc[-1] future_months = np.array([last_month_index + i for i in range(1, prediction_months + 1)]).reshape(-1, 1) predicted_prices = model.predict(future_months) # Prepare data for plotting historical_prices_trace = go.Scatter( x=df['Date'], y=df['Price'], mode="lines", name="Historical Prices" ) future_dates = [df['Date'].iloc[-1] + MonthEnd(i) for i in range(1, prediction_months + 1)] predicted_prices_trace = go.Scatter( x=future_dates, y=predicted_prices, mode="lines", name="Predicted Prices" ) # Plot data fig = go.Figure() fig.add_trace(historical_prices_trace) fig.add_trace(predicted_prices_trace) fig.update_layout( title=f"Real Estate Price Prediction for Zip Code {zip}", xaxis_title="Date", yaxis_title="Price", legend_title_text="Data" ) return fig # Gradio interface with updated inputs interface = gr.Interface( fn=plot_and_predict, inputs=[ gr.Textbox(label="ZIP Code", placeholder="e.g., 90210"), gr.Textbox(label="Start Year", placeholder="e.g., 2020"), gr.Dropdown(label="Start Month", choices=[str(i) for i in range(1, 13)]), gr.Slider(minimum=1, maximum=60, step=1, label="Prediction Months"), ], outputs="plot", title="Real Estate Price Predictor", description="Enter a ZIP code, start year, start month, and the number of months for price prediction." ) # Launch the app interface.launch(debug=True)