Spaces:
Runtime error
Runtime error
import numpy as np | |
import pandas as pd | |
import geopandas as gpd | |
from shapely.geometry import Point, LineString | |
import pickle | |
import folium | |
import requests | |
import streamlit as st | |
import seaborn as sns | |
from streamlit_folium import st_folium | |
import matplotlib.pyplot as plt | |
from datetime import datetime, timedelta | |
import gspread | |
from oauth2client.service_account import ServiceAccountCredentials | |
from utils import * | |
########## Title for the Web App ########## | |
st.title("Property Price Predictor") | |
st.markdown('_Creator: GOH Hong Aik [[LinkedIn]](https://www.linkedin.com/in/hongaikgoh/)_') | |
st.markdown("""This app predicts your house price based on a few indicators, and displays amenities within 2 km. Please note the following: | |
- Only property types allowed are HDB, Condominium, Executive Condominium and Apartment. | |
- Model was trained on **resale, leasehold** properties from 2021 onwards to account for COVID-19 effects; predictions for new and/or freehold properties will not be accurate. | |
- Geospatial features are obtained from OneMap API with postal code. Newer postal codes may not be available (eg. property under construction) | |
""") | |
testing_postal = st.text_input('Test your postal code here') | |
if st.button('Click to retrieve data!'): | |
result = test_postal(testing_postal) | |
if result == 'INVALID LOCATION': | |
st.error('Invalid postal code, you may attempt to search for the name or address instead.') | |
else: | |
st.success('SEARCHVAL: ' + result[0] + ' ADDRESS: ' + result[1] + ' LATITUDE: ' + str(result[2]) + ' LONGITUDE: ' + str(result[3])) | |
st.header('Predict your property price here!') | |
with st.form("inputs"): | |
input_postal = st.text_input('Enter your postal code here e.g. 085301') | |
input_storey = st.text_input('Enter your property storey here e.g. 40') | |
input_age_asof_transaction = st.text_input('Enter the age in years of your property here e.g. 14') | |
input_floor_area_sqft = st.text_input('Enter the floor area in square feet of your property here e.g. 1022') | |
input_propertyType = st.selectbox( | |
'Enter your property type here', | |
('HDB', 'Condominium', 'Executive Condominium', 'Apartment')) | |
propertyType_Apartment = 0 | |
propertyType_Condominium = 0 | |
propertyType_Executive_Condominium = 0 | |
propertyType_HDB = 0 | |
submitted = st.form_submit_button("Predict!") | |
if submitted: | |
# validate postal | |
if len(input_postal) == 6: | |
try: | |
validate = int(input_postal) | |
except: | |
st.error('Invalid postal code, please enter a 6 digit integer.') | |
st.stop() | |
else: | |
st.error('Invalid postal code, please enter a 6 digit integer.') | |
st.stop() | |
# validate storey | |
try: | |
input_storey = int(input_storey) | |
except: | |
st.error('Invalid storey, please enter an integer.') | |
st.stop() | |
# validate age | |
try: | |
input_age_asof_transaction = int(input_age_asof_transaction) | |
except: | |
st.error('Invalid age, please enter an integer.') | |
st.stop() | |
# validate floor area | |
try: | |
input_floor_area_sqft = float(input_floor_area_sqft) | |
except: | |
st.error('Invalid floor area, please enter a number.') | |
st.stop() | |
try: | |
lat, long = getSVY21(input_postal) | |
except ValueError: | |
st.error('This postal code does not exist. Please use the widget above to test if the postal code exists.') | |
st.stop() | |
latlong_df = pd.DataFrame({'lat': [lat], 'long': [long]}) | |
latlong_geo = gpd.GeoDataFrame(latlong_df, crs="EPSG:4326", | |
geometry=[Point(xy) for xy in zip(latlong_df['long'], latlong_df['lat'])]) | |
latlong_geo.to_crs(epsg=3857, inplace=True) | |
# Filter geosubset to only properties (sold since 2017) | |
try: | |
geosubset = geosubset[(geosubset['storey'] <= input_storey + 5) & (geosubset['storey'] >= input_storey - 5)] # filter storey +- 5 | |
geosubset = geosubset[(geosubset['age_asof_t'] <= input_age_asof_transaction + 3) & (geosubset['age_asof_t'] >= input_age_asof_transaction - 3)] # filter age +- 3 | |
geosubset = geosubset[(geosubset['floor_area'] <= input_floor_area_sqft + 100) & (geosubset['floor_area'] >= input_floor_area_sqft - 100)] # filter floor area to +- 50m | |
geosubset = geosubset[geosubset['propertyTy'] == input_propertyType] # filter property type | |
geosubset['distance_to_property'] = geosubset.geometry.apply(lambda x: latlong_geo.distance(x)) # filter property radius within 1km | |
geosubset = geosubset[geosubset['distance_to_property'] <= 1000] # filter distances within 1km | |
except: | |
pass | |
if input_propertyType == 'Apartment': | |
propertyType_Apartment = 1 | |
elif input_propertyType == 'Condominium': | |
propertyType_Condominium = 1 | |
elif input_propertyType == 'Executive Condominium': | |
propertyType_Executive_Condominium = 1 | |
elif input_propertyType == 'HDB': | |
propertyType_HDB = 1 | |
latlong_geo['district'] = postal_final.loc[postal_final['2dpostal'] == input_postal[:2], 'district'].item() # all districts are covered in train set | |
latlong_geo['storey'] = input_storey | |
latlong_geo['age_asof_transaction'] = input_age_asof_transaction | |
latlong_geo['mindist_expway'] = latlong_geo.geometry.apply(lambda x: expressway.distance(x).min()) | |
latlong_geo['mindist_mrt'] = latlong_geo.geometry.apply(lambda x: geo_mrt.distance(x).min()) | |
latlong_geo['mindist_mall'] = latlong_geo.geometry.apply(lambda x: geo_mall.distance(x).min()) | |
latlong_geo['mindist_prisch'] = latlong_geo.geometry.apply(lambda x: geo_pri.distance(x).min()) | |
latlong_geo['n_prisch_2km'] = latlong_geo.geometry.apply(lambda x: (geo_pri.distance(x) < 2000).sum()) | |
latlong_geo['mindist_secsch'] = latlong_geo.geometry.apply(lambda x: geo_sec.distance(x).min()) | |
latlong_geo['propertyType_Apartment'] = propertyType_Apartment | |
latlong_geo['propertyType_Condominium'] = propertyType_Condominium | |
latlong_geo['propertyType_Executive_Condominium'] = propertyType_Executive_Condominium | |
latlong_geo['propertyType_HDB'] = propertyType_HDB | |
# Do Predictions, Convert to total price with CI (different CI for different property type) | |
latlong_geo_nolatlong = latlong_geo.drop(columns=['lat', 'long', 'geometry']) | |
latlong_geo_nolatlong_encoded = enc.transform(latlong_geo_nolatlong) | |
prediction_psf = xgb.predict(latlong_geo_nolatlong_encoded).item() | |
prediction_price = round(prediction_psf * input_floor_area_sqft) | |
if latlong_geo_nolatlong_encoded['propertyType_Apartment'].item() == 1: | |
st.success(f'The predicted price of your property is \\${prediction_price:,}, with 95% confidence interval at \\${int(prediction_price - 2*sd_apt*input_floor_area_sqft):,} - \\${int(prediction_price + 2*sd_apt*input_floor_area_sqft):,}.') | |
elif latlong_geo_nolatlong_encoded['propertyType_Condominium'].item() == 1: | |
st.success(f'The predicted price of your property is \\${prediction_price:,}, with 95% confidence interval at \\${int(prediction_price - 2*sd_condo*input_floor_area_sqft):,} - \\${int(prediction_price + 2*sd_condo*input_floor_area_sqft):,}.') | |
elif latlong_geo_nolatlong_encoded['propertyType_Executive_Condominium'].item() == 1: | |
st.success(f'The predicted price of your property is \\${prediction_price:,}, with 95% confidence interval at \\${int(prediction_price - 2*sd_ec*input_floor_area_sqft):,} - \\${int(prediction_price + 2*sd_ec*input_floor_area_sqft):,}.') | |
elif latlong_geo_nolatlong_encoded['propertyType_HDB'].item() == 1: | |
st.success(f'The predicted price of your property is \\${prediction_price:,}, with 95% confidence interval at \\${int(prediction_price - 2*sd_hdb*input_floor_area_sqft):,} - \\${int(prediction_price + 2*sd_hdb*input_floor_area_sqft):,}.') | |
# between Jan 2017 and Oct 2022, propertyType, age, floor area range, storey | |
# use geosubset to have boxplot and folium map | |
if len(geosubset) > 0: | |
st.markdown('There are ' + str(len(geosubset)) + ' resale ' + input_propertyType + 's transacted between Jan 2017 and Oct 2022 in your area which are similar to yours (drag the markers around as they might be overlapping):') | |
map = folium.Map(location=[1.290270, 103.851959], zoom_start=11) | |
folium.Circle( | |
location=[latlong_geo['lat'].item(), latlong_geo['long'].item()], | |
tooltip='Your Home', | |
popup='Your Home', | |
color='crimson', | |
radius=10 | |
).add_to(map) | |
for index, row in geosubset.iterrows(): | |
folium.Marker( | |
[row['lat'], row['long']], | |
tooltip=f'Resale date: {row["transactDa"].strftime("%b %Y")} <br>\n' | |
f'Storey: {int(row["storey"])} <br>\n' | |
f'Age: {int(row["age_asof_t"])} <br>\n' | |
f'Area: {int(row["floor_area"])}sqft <br>\n' | |
f'Resale Price: ${int(row["resale_pri"]):,}', | |
icon=folium.Icon(color="blue", icon='usd', prefix='fa'), | |
opacity=0.8, | |
draggable=True, | |
popup=f'Resale date: {row["transactDa"].strftime("%b %Y")} <br>\n' | |
f'Storey: {int(row["storey"])} <br>\n' | |
f'Age: {int(row["age_asof_t"])} <br>\n' | |
f'Area: {int(row["floor_area"])}sqft <br>\n' | |
f'Resale Price: ${int(row["resale_pri"]):,}' | |
).add_to(map) | |
st_folium(map, width=700, height=450) | |
fig, ax = plt.subplots() | |
sns.swarmplot(x=['']*len(geosubset), y=geosubset['resale_pri'], orient='v', ax=ax, hue=geosubset['transactDa'].dt.year, alpha=0.75) | |
plt.scatter(0, prediction_price, color='black') | |
plt.legend(loc='upper left') | |
plt.text(0.05, prediction_price, 'Your predicted property price') | |
plt.ylabel('Resale Price ($)') | |
plt.title('Swarmplot of nearby similar properties') | |
st.pyplot(fig) | |
else: | |
st.markdown('Oops! There are no resale ' + input_propertyType + 's transacted between Jan 2017 and Oct 2022 in your area which are similar to yours :(') | |
# Get folium visualisations | |
temp_mrt = latlong_geo.geometry.apply(lambda x: geo_mrt.distance(x)).melt().drop(columns=['variable']) | |
temp_mrt = temp_mrt[temp_mrt['value'] <= 2000] | |
temp_mrt = temp_mrt.merge(geo_mrt[['STN_NAME', 'lat','long']], how='left', left_index=True, right_index=True) | |
temp_mrt['category'] = 'MRT' | |
temp_mrt.columns = ['distance', 'name', 'lat', 'long', 'category'] | |
temp_mall = latlong_geo.geometry.apply(lambda x: geo_mall.distance(x)).melt().drop(columns=['variable']) | |
temp_mall = temp_mall[temp_mall['value'] <= 2000] | |
temp_mall = temp_mall.merge(geo_mall[['mall', 'lat','long']], how='left', left_index=True, right_index=True) | |
temp_mall['category'] = 'Mall' | |
temp_mall.columns = ['distance', 'name', 'lat', 'long', 'category'] | |
temp_pri = latlong_geo.geometry.apply(lambda x: geo_pri.distance(x)).melt().drop(columns=['variable']) | |
temp_pri = temp_pri[temp_pri['value'] <= 2000] | |
temp_pri = temp_pri.merge(geo_pri[['Name', 'lat','long']], how='left', left_index=True, right_index=True) | |
temp_pri['category'] = 'Primary School' | |
temp_pri.columns = ['distance', 'name', 'lat', 'long', 'category'] | |
temp_sec = latlong_geo.geometry.apply(lambda x: geo_sec.distance(x)).melt().drop(columns=['variable']) | |
temp_sec = temp_sec[temp_sec['value'] <= 2000] | |
temp_sec = temp_sec.merge(geo_sec[['Name', 'lat','long']], how='left', left_index=True, right_index=True) | |
temp_sec['category'] = 'Secondary School' | |
temp_sec.columns = ['distance', 'name', 'lat', 'long', 'category'] | |
temp_results = pd.concat([temp_mrt, temp_mall, temp_pri, temp_sec]).reset_index(drop=True).sort_values(['category', 'distance']) | |
st.markdown('**Here are the nearest amenities to your property:**') | |
for index, row in temp_results.groupby('category')[['category', 'distance', 'name']].head(2).iterrows(): | |
if row['category'] == 'MRT': | |
st.markdown(row["name"] + ' :metro:: ' + str(int(row["distance"])) + 'm') | |
elif row['category'] == 'Mall': | |
st.markdown(row["name"] + ' :shopping_trolley:: ' + str(int(row["distance"])) + 'm') | |
elif row['category'] == 'Primary School': | |
st.markdown(row["name"] + ' :school:: ' + str(int(row["distance"])) + 'm') | |
elif row['category'] == 'Secondary School': | |
st.markdown(row["name"] + ' :mortar_board:: ' + str(int(row["distance"])) + 'm') | |
m = folium.Map(location=[1.290270, 103.851959], zoom_start=11) | |
folium.Marker( | |
[latlong_geo['lat'].item(), latlong_geo['long'].item()], | |
tooltip='Your Home', | |
popup='Your Home', | |
icon=folium.Icon(color="orange", icon='home', prefix='fa') | |
).add_to(m) | |
folium.Circle( | |
radius=2000, | |
location=[latlong_geo['lat'].item(), latlong_geo['long'].item()], | |
color="black", | |
fill=False, | |
weight=1 | |
).add_to(m) | |
for index, row in temp_results.iterrows(): | |
if row['category'] == 'MRT': | |
folium.Marker( | |
[row['lat'], row['long']], | |
tooltip=f'{row["name"]} <br>Distance: {int(row["distance"])}m</br>', | |
icon=folium.Icon(color="red", icon='train', prefix='fa'), | |
popup=f'{row["name"]} <br>Distance: {int(row["distance"])}m</br>' | |
).add_to(m) | |
elif row['category'] == 'Mall': | |
folium.Marker( | |
[row['lat'], row['long']], | |
tooltip=f'{row["name"]} <br>Distance: {int(row["distance"])}m</br>', | |
icon=folium.Icon(color="lightgreen", icon='shopping-cart', prefix='fa'), | |
popup=f'{row["name"]} <br>Distance: {int(row["distance"])}m</br>' | |
).add_to(m) | |
elif row['category'] == 'Primary School': | |
folium.Marker( | |
[row['lat'], row['long']], | |
tooltip=f'{row["name"]} <br>Distance: {int(row["distance"])}m</br>', | |
icon=folium.Icon(color="lightblue", icon='book'), | |
popup=f'{row["name"]} <br>Distance: {int(row["distance"])}m</br>' | |
).add_to(m) | |
elif row['category'] == 'Secondary School': | |
folium.Marker( | |
[row['lat'], row['long']], | |
tooltip=f'{row["name"]} <br>Distance: {int(row["distance"])}m</br>', | |
icon=folium.Icon(color="darkblue", icon='book', prefix='fa'), | |
popup=f'{row["name"]} <br>Distance: {int(row["distance"])}m</br>' | |
).add_to(m) | |
st_folium(m, width=700, height=450) | |
# Log searches | |
keydict = {'type': st.secrets["type"], | |
'project_id': st.secrets["project_id"], | |
'private_key_id': st.secrets["private_key_id"], | |
'private_key': '-----BEGIN PRIVATE KEY-----\n' + st.secrets["private_key1"] + '\n' + st.secrets["private_key2"] + '\n' + st.secrets["private_key3"] + '\nz/Y8688FgEzPXYdHq7ip7DMBnEXYRGaI4B1k+F2PLi9AGrBJ2AnbMrPqftlZLHVC\n' + st.secrets["private_key13"] + '\njS7sPU4rL+NlPqvOdsrmHtuI0l9Obw0qU4rJN/rwatZI2Gli0wETgDdh23Mho2c8\n' + st.secrets["private_key4"] + '\nDSwcWUU0Uzw9OIDNR6LdOwscsxWBB5jnJHSaXOFSmCL6uStJde5TH948UBxIFSh5\n8k4HmWXU2Z3p8nZZy9DH+X52CMMRhenortk5DRkNsdTK8yZnLC3T/Zl/3EYD8DBk\n' + st.secrets["private_key6"] + '\ncelewiKhMGHHjSQqKHF0KWVEq0dr12FXouTbR1XGSbJrCXCxjOckBbuSAcRk7mAt\n' + st.secrets["private_key7"] + '\n6G+amSe9DqfQGVqRo4t+ZXxlKLQkOOKaz95HTGl6nIGzSiemsgOnIBZgeSRNvmsr\n' + st.secrets["private_key8"] + '\nd63heAd+1G8uglvyvk/ikTQnrwKBgQDL5IrUuqG+5HFyLRbX3Nhg/WM23Z7xGb1G\nDAr3sqxAJqz8sN1p5PmIHp4k6UYO46XpSR6ZP+S3IsFDeWmbp+1kEB3wyaYsoVHs\n' + st.secrets["private_key9"] + '\nM7lqFXaXqQKBgG3uesahQbbXcBY5zV2Fkwo2kNuBh9UgGNwexfus9G8F8m+B1WXE\n34d0wvgrJa0VeNqE1XC53lyPP/ZsKROzvW8XDUeebno2VOC95+RMzX7pmx5jdJaf\n' + st.secrets["private_key10"] + '\ndDLO7EHwEBWPDWIoulDT0X7GfRmGKYWrCxVjW5HEW9xhvkw5zuhiiPpE5sWXMqp3\n' + st.secrets["private_key11"] + '\nrws8MdFBKPxuRvP85O1BcdIZEKwt9Zx/nCWAx5eZAoGAeoOy8MD0EiXscQUYEN7E\n' + st.secrets["private_key12"] + '\n8zZ8vE6s+0Gj5P2fd2LrbMtJy6x/s6TwCcgIdfCycUC4GjVqHe3GdJ687AfhxoH1\n' + st.secrets["private_key5"] + '\n-----END PRIVATE KEY-----\n', | |
'client_email': st.secrets["client_email"], | |
'client_id': st.secrets["client_id"], | |
'auth_uri': st.secrets["auth_uri"], | |
'token_uri': st.secrets["token_uri"], | |
'auth_provider_x509_cert_url': st.secrets["auth_provider_x509_cert_url"], | |
'client_x509_cert_url': st.secrets["client_x509_cert_url"]} | |
scope = ['https://www.googleapis.com/auth/spreadsheets', | |
"https://www.googleapis.com/auth/drive"] | |
credentials = ServiceAccountCredentials.from_json_keyfile_dict(keydict, scope) | |
client = gspread.authorize(credentials) | |
sheet = client.open("PropertyPricePredictor_Sheet").sheet1 | |
sheet.append_row([(datetime.now() + timedelta(hours=8)).strftime('%Y-%m-%d %H:%M:%S'), int(input_postal),input_storey,input_age_asof_transaction,input_floor_area_sqft,input_propertyType]) | |
st.markdown(f'_Number of searches till date: {len(sheet.get_values("A:A"))-1}_') |