File size: 5,885 Bytes
57bb50d
 
 
 
 
 
 
 
c9fee1a
57bb50d
f997324
c0750c9
57bb50d
 
 
b8e6bb2
57bb50d
b8e6bb2
57bb50d
f997324
fad2d01
57bb50d
d8774fc
57bb50d
b8e6bb2
57bb50d
 
11bc620
 
 
 
57bb50d
11bc620
b8e6bb2
bd32737
053e378
c0750c9
d8774fc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c88b28f
 
 
 
1d339ec
 
 
 
 
 
c53a23a
 
 
c9fee1a
b77f4eb
c53a23a
7623b8e
a270b83
c53a23a
5611617
b77f4eb
c9fee1a
 
 
 
f64ca0e
c9fee1a
a270b83
 
c53a23a
a270b83
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
import streamlit as st
from streamlit_shap import st_shap
import shap
from datasets import load_dataset
from sklearn.model_selection import train_test_split
import lightgbm as lgb
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

@st.experimental_memo
def load_data(selected_columns):
    dataset = load_dataset("ttd22/house-price", streaming = True)
    df = pd.DataFrame.from_dict(dataset["train"])
    df = df.drop('Id', axis=1)
    df = df[selected_columns]
    X, y = df.drop("SalePrice", axis=1), df["SalePrice"]
    return X,y

@st.experimental_memo
def load_model(X, y):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    params = {'n_estimators': 378, 'num_leaves': 31, 'max_depth': 35, 'learning_rate': 0.01713200441531346, 'colsample_bytree': 0.5167187468778591, 'subsample': 0.7380799250244564, 'lambda_l1': 4.712815578969268, 'lambda_l2': 1.7427073600749474, 'min_child_weight': 9.962946763677316}
    model = lgb.LGBMRegressor(**params)
    model.fit(X_train, y_train, eval_set=[(X_test, y_test)], verbose = False)
    return model

selected_columns = ['OverallQual', 'GrLivArea', 'TotalBsmtSF', 'YearBuilt', 'BsmtFinSF1',
    'GarageCars', 'OverallCond', 'LotArea', 'YearRemodAdd', '1stFlrSF',
    'Fireplaces', 'GarageArea','OpenPorchSF', '2ndFlrSF',
    'TotRmsAbvGrd', 'FullBath', 'HalfBath', 'MSSubClass', 'BedroomAbvGr', 'KitchenAbvGr', 'SalePrice']
# train LightGBM model
X,y = load_data(selected_columns)
model = load_model(X, y)


def buil_UI(X):
    # min_values = X.min()
    # max_values = X.max()
    OverallQual = st.sidebar.slider('What is Overall material and finish quality?', min_value = 1, max_value= 10)
    GrLivArea = st.sidebar.slider('What is Above grade (ground) living area square feet?', min_value = 334, max_value= 5642)
    TotalBsmtSF = st.sidebar.slider('What is Total square feet of basement area?', min_value = 0, max_value= 6110)
    YearBuilt = st.sidebar.slider('What is Original construction date?', min_value = 1872, max_value= 2010)
    BsmtFinSF1 = st.sidebar.slider('What is Type 1 finished square feet?', min_value = 0, max_value= 5644)
    GarageCars = st.sidebar.slider('What is Size of garage in car capacity?', min_value = 0, max_value= 4)
    OverallCond = st.sidebar.slider('What is Overall condition rating?', min_value = 1, max_value= 9)
    LotArea = st.sidebar.slider('What is Lot size in square feet?', min_value = 1300, max_value= 215245)
    YearRemodAdd = st.sidebar.slider('What is Remodel date?', min_value = 1950, max_value= 2010)
    FirstFlrSF = st.sidebar.slider('What is First Floor square feet?', min_value = 334, max_value= 4692)
    Fireplaces = st.sidebar.slider('What is Number of fireplaces?', min_value = 0, max_value= 3)
    GarageArea = st.sidebar.slider('What is Size of garage in square feet?', min_value = 0, max_value= 1418)
    OpenPorchSF = st.sidebar.slider('What is Open porch area in square feet?', min_value = 0, max_value= 547)
    SecondFlrSF = st.sidebar.slider('What is Second floor square feet?', min_value = 0, max_value= 2062)
    TotRmsAbvGrd = st.sidebar.slider('What is Total rooms above grade (does not include bathrooms)?', min_value = 2, max_value= 14)
    FullBath = st.sidebar.slider('What is Full bathrooms above grade?', min_value = 0, max_value= 3)
    HalfBath = st.sidebar.slider('What is Half baths above grade?', min_value = 0, max_value= 2)
    MSSubClass = st.sidebar.slider('What is The building class?', min_value = 20, max_value= 190)
    BedroomAbvGr = st.sidebar.slider('What is Number of bedrooms above basement level?', min_value = 0, max_value= 8)
    KitchenAbvGr = st.sidebar.slider('What is Number of kitchens?', min_value = 0, max_value= 3)
    feature_titles = ['OverallQual', 'GrLivArea', 'TotalBsmtSF', 'YearBuilt', 'BsmtFinSF1',
    'GarageCars', 'OverallCond', 'LotArea', 'YearRemodAdd', '1stFlrSF',
    'Fireplaces', 'GarageArea','OpenPorchSF', '2ndFlrSF',
    'TotRmsAbvGrd', 'FullBath', 'HalfBath', 'MSSubClass', 'BedroomAbvGr', 'KitchenAbvGr']
    feature_values = features = [OverallQual, GrLivArea, TotalBsmtSF, YearBuilt, BsmtFinSF1, GarageCars, OverallCond, LotArea, YearRemodAdd, FirstFlrSF, Fireplaces, GarageArea,OpenPorchSF, SecondFlrSF, TotRmsAbvGrd, FullBath, HalfBath, MSSubClass, BedroomAbvGr, KitchenAbvGr]
    df_new = pd.DataFrame([feature_values], columns=feature_titles)
    return df_new

input_val = buil_UI(X)

st.markdown(""" <style> .font {font-size:50px ; font-family: 'Cooper Black'; color: #FF9633;} </style> """, unsafe_allow_html=True)
st.markdown('<p class="font">House price prediction - Apply LightGBM Hyperparameter Tuning with Optuna</p>', unsafe_allow_html=True)

if st.button('Calculate House Price'):
    predictLGBM = model.predict(input_val)
    st.write('Predicted house price is: $',predictLGBM[0])
    st.write('Please wait for 4 figures loading')
    #input_val
    st.write('SHAP Summary Plot and Interaction Summary Plot for this predicted house')
    st.set_option('deprecation.showPyplotGlobalUse', False)
    shap_values = shap.TreeExplainer(model).shap_values(input_val)
    fig_summary = shap.summary_plot(shap_values, input_val)
    st.pyplot(fig_summary)

    shap_interaction_values = shap.TreeExplainer(model).shap_interaction_values(input_val.iloc[:,:])
    interaction_fig = shap.summary_plot(shap_interaction_values, input_val.iloc[:,:])
    st.pyplot(interaction_fig)
    
    # X
    st.write('SHAP Summary Plot and Interaction Summary Plot for train dataset')
    shap_values_X = shap.TreeExplainer(model).shap_values(X)
    fig_summary_X = shap.summary_plot(shap_values_X, X)
    st.pyplot(fig_summary_X)

    shap_interaction_values_X = shap.TreeExplainer(model).shap_interaction_values(X.iloc[:,:])
    interaction_fig_X = shap.summary_plot(shap_interaction_values_X, X.iloc[:,:])
    st.pyplot(interaction_fig_X)