ttd22's picture
Update app.py
5611617
raw
history blame contribute delete
No virus
5.89 kB
import streamlit as st
from streamlit_shap import st_shap
import shap
from datasets import load_dataset
from sklearn.model_selection import train_test_split
import lightgbm as lgb
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
@st.experimental_memo
def load_data(selected_columns):
dataset = load_dataset("ttd22/house-price", streaming = True)
df = pd.DataFrame.from_dict(dataset["train"])
df = df.drop('Id', axis=1)
df = df[selected_columns]
X, y = df.drop("SalePrice", axis=1), df["SalePrice"]
return X,y
@st.experimental_memo
def load_model(X, y):
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
params = {'n_estimators': 378, 'num_leaves': 31, 'max_depth': 35, 'learning_rate': 0.01713200441531346, 'colsample_bytree': 0.5167187468778591, 'subsample': 0.7380799250244564, 'lambda_l1': 4.712815578969268, 'lambda_l2': 1.7427073600749474, 'min_child_weight': 9.962946763677316}
model = lgb.LGBMRegressor(**params)
model.fit(X_train, y_train, eval_set=[(X_test, y_test)], verbose = False)
return model
selected_columns = ['OverallQual', 'GrLivArea', 'TotalBsmtSF', 'YearBuilt', 'BsmtFinSF1',
'GarageCars', 'OverallCond', 'LotArea', 'YearRemodAdd', '1stFlrSF',
'Fireplaces', 'GarageArea','OpenPorchSF', '2ndFlrSF',
'TotRmsAbvGrd', 'FullBath', 'HalfBath', 'MSSubClass', 'BedroomAbvGr', 'KitchenAbvGr', 'SalePrice']
# train LightGBM model
X,y = load_data(selected_columns)
model = load_model(X, y)
def buil_UI(X):
# min_values = X.min()
# max_values = X.max()
OverallQual = st.sidebar.slider('What is Overall material and finish quality?', min_value = 1, max_value= 10)
GrLivArea = st.sidebar.slider('What is Above grade (ground) living area square feet?', min_value = 334, max_value= 5642)
TotalBsmtSF = st.sidebar.slider('What is Total square feet of basement area?', min_value = 0, max_value= 6110)
YearBuilt = st.sidebar.slider('What is Original construction date?', min_value = 1872, max_value= 2010)
BsmtFinSF1 = st.sidebar.slider('What is Type 1 finished square feet?', min_value = 0, max_value= 5644)
GarageCars = st.sidebar.slider('What is Size of garage in car capacity?', min_value = 0, max_value= 4)
OverallCond = st.sidebar.slider('What is Overall condition rating?', min_value = 1, max_value= 9)
LotArea = st.sidebar.slider('What is Lot size in square feet?', min_value = 1300, max_value= 215245)
YearRemodAdd = st.sidebar.slider('What is Remodel date?', min_value = 1950, max_value= 2010)
FirstFlrSF = st.sidebar.slider('What is First Floor square feet?', min_value = 334, max_value= 4692)
Fireplaces = st.sidebar.slider('What is Number of fireplaces?', min_value = 0, max_value= 3)
GarageArea = st.sidebar.slider('What is Size of garage in square feet?', min_value = 0, max_value= 1418)
OpenPorchSF = st.sidebar.slider('What is Open porch area in square feet?', min_value = 0, max_value= 547)
SecondFlrSF = st.sidebar.slider('What is Second floor square feet?', min_value = 0, max_value= 2062)
TotRmsAbvGrd = st.sidebar.slider('What is Total rooms above grade (does not include bathrooms)?', min_value = 2, max_value= 14)
FullBath = st.sidebar.slider('What is Full bathrooms above grade?', min_value = 0, max_value= 3)
HalfBath = st.sidebar.slider('What is Half baths above grade?', min_value = 0, max_value= 2)
MSSubClass = st.sidebar.slider('What is The building class?', min_value = 20, max_value= 190)
BedroomAbvGr = st.sidebar.slider('What is Number of bedrooms above basement level?', min_value = 0, max_value= 8)
KitchenAbvGr = st.sidebar.slider('What is Number of kitchens?', min_value = 0, max_value= 3)
feature_titles = ['OverallQual', 'GrLivArea', 'TotalBsmtSF', 'YearBuilt', 'BsmtFinSF1',
'GarageCars', 'OverallCond', 'LotArea', 'YearRemodAdd', '1stFlrSF',
'Fireplaces', 'GarageArea','OpenPorchSF', '2ndFlrSF',
'TotRmsAbvGrd', 'FullBath', 'HalfBath', 'MSSubClass', 'BedroomAbvGr', 'KitchenAbvGr']
feature_values = features = [OverallQual, GrLivArea, TotalBsmtSF, YearBuilt, BsmtFinSF1, GarageCars, OverallCond, LotArea, YearRemodAdd, FirstFlrSF, Fireplaces, GarageArea,OpenPorchSF, SecondFlrSF, TotRmsAbvGrd, FullBath, HalfBath, MSSubClass, BedroomAbvGr, KitchenAbvGr]
df_new = pd.DataFrame([feature_values], columns=feature_titles)
return df_new
input_val = buil_UI(X)
st.markdown(""" <style> .font {font-size:50px ; font-family: 'Cooper Black'; color: #FF9633;} </style> """, unsafe_allow_html=True)
st.markdown('<p class="font">House price prediction - Apply LightGBM Hyperparameter Tuning with Optuna</p>', unsafe_allow_html=True)
if st.button('Calculate House Price'):
predictLGBM = model.predict(input_val)
st.write('Predicted house price is: $',predictLGBM[0])
st.write('Please wait for 4 figures loading')
#input_val
st.write('SHAP Summary Plot and Interaction Summary Plot for this predicted house')
st.set_option('deprecation.showPyplotGlobalUse', False)
shap_values = shap.TreeExplainer(model).shap_values(input_val)
fig_summary = shap.summary_plot(shap_values, input_val)
st.pyplot(fig_summary)
shap_interaction_values = shap.TreeExplainer(model).shap_interaction_values(input_val.iloc[:,:])
interaction_fig = shap.summary_plot(shap_interaction_values, input_val.iloc[:,:])
st.pyplot(interaction_fig)
# X
st.write('SHAP Summary Plot and Interaction Summary Plot for train dataset')
shap_values_X = shap.TreeExplainer(model).shap_values(X)
fig_summary_X = shap.summary_plot(shap_values_X, X)
st.pyplot(fig_summary_X)
shap_interaction_values_X = shap.TreeExplainer(model).shap_interaction_values(X.iloc[:,:])
interaction_fig_X = shap.summary_plot(shap_interaction_values_X, X.iloc[:,:])
st.pyplot(interaction_fig_X)