import streamlit as st from streamlit_shap import st_shap import shap from datasets import load_dataset from sklearn.model_selection import train_test_split import lightgbm as lgb import numpy as np import pandas as pd import matplotlib.pyplot as plt @st.experimental_memo def load_data(selected_columns): dataset = load_dataset("ttd22/house-price", streaming = True) df = pd.DataFrame.from_dict(dataset["train"]) df = df.drop('Id', axis=1) df = df[selected_columns] X, y = df.drop("SalePrice", axis=1), df["SalePrice"] return X,y @st.experimental_memo def load_model(X, y): X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) params = {'n_estimators': 378, 'num_leaves': 31, 'max_depth': 35, 'learning_rate': 0.01713200441531346, 'colsample_bytree': 0.5167187468778591, 'subsample': 0.7380799250244564, 'lambda_l1': 4.712815578969268, 'lambda_l2': 1.7427073600749474, 'min_child_weight': 9.962946763677316} model = lgb.LGBMRegressor(**params) model.fit(X_train, y_train, eval_set=[(X_test, y_test)], verbose = False) return model selected_columns = ['OverallQual', 'GrLivArea', 'TotalBsmtSF', 'YearBuilt', 'BsmtFinSF1', 'GarageCars', 'OverallCond', 'LotArea', 'YearRemodAdd', '1stFlrSF', 'Fireplaces', 'GarageArea','OpenPorchSF', '2ndFlrSF', 'TotRmsAbvGrd', 'FullBath', 'HalfBath', 'MSSubClass', 'BedroomAbvGr', 'KitchenAbvGr', 'SalePrice'] # train LightGBM model X,y = load_data(selected_columns) model = load_model(X, y) def buil_UI(X): # min_values = X.min() # max_values = X.max() OverallQual = st.sidebar.slider('What is Overall material and finish quality?', min_value = 1, max_value= 10) GrLivArea = st.sidebar.slider('What is Above grade (ground) living area square feet?', min_value = 334, max_value= 5642) TotalBsmtSF = st.sidebar.slider('What is Total square feet of basement area?', min_value = 0, max_value= 6110) YearBuilt = st.sidebar.slider('What is Original construction date?', min_value = 1872, max_value= 2010) BsmtFinSF1 = st.sidebar.slider('What is Type 1 finished square feet?', min_value = 0, max_value= 5644) GarageCars = st.sidebar.slider('What is Size of garage in car capacity?', min_value = 0, max_value= 4) OverallCond = st.sidebar.slider('What is Overall condition rating?', min_value = 1, max_value= 9) LotArea = st.sidebar.slider('What is Lot size in square feet?', min_value = 1300, max_value= 215245) YearRemodAdd = st.sidebar.slider('What is Remodel date?', min_value = 1950, max_value= 2010) FirstFlrSF = st.sidebar.slider('What is First Floor square feet?', min_value = 334, max_value= 4692) Fireplaces = st.sidebar.slider('What is Number of fireplaces?', min_value = 0, max_value= 3) GarageArea = st.sidebar.slider('What is Size of garage in square feet?', min_value = 0, max_value= 1418) OpenPorchSF = st.sidebar.slider('What is Open porch area in square feet?', min_value = 0, max_value= 547) SecondFlrSF = st.sidebar.slider('What is Second floor square feet?', min_value = 0, max_value= 2062) TotRmsAbvGrd = st.sidebar.slider('What is Total rooms above grade (does not include bathrooms)?', min_value = 2, max_value= 14) FullBath = st.sidebar.slider('What is Full bathrooms above grade?', min_value = 0, max_value= 3) HalfBath = st.sidebar.slider('What is Half baths above grade?', min_value = 0, max_value= 2) MSSubClass = st.sidebar.slider('What is The building class?', min_value = 20, max_value= 190) BedroomAbvGr = st.sidebar.slider('What is Number of bedrooms above basement level?', min_value = 0, max_value= 8) KitchenAbvGr = st.sidebar.slider('What is Number of kitchens?', min_value = 0, max_value= 3) feature_titles = ['OverallQual', 'GrLivArea', 'TotalBsmtSF', 'YearBuilt', 'BsmtFinSF1', 'GarageCars', 'OverallCond', 'LotArea', 'YearRemodAdd', '1stFlrSF', 'Fireplaces', 'GarageArea','OpenPorchSF', '2ndFlrSF', 'TotRmsAbvGrd', 'FullBath', 'HalfBath', 'MSSubClass', 'BedroomAbvGr', 'KitchenAbvGr'] feature_values = features = [OverallQual, GrLivArea, TotalBsmtSF, YearBuilt, BsmtFinSF1, GarageCars, OverallCond, LotArea, YearRemodAdd, FirstFlrSF, Fireplaces, GarageArea,OpenPorchSF, SecondFlrSF, TotRmsAbvGrd, FullBath, HalfBath, MSSubClass, BedroomAbvGr, KitchenAbvGr] df_new = pd.DataFrame([feature_values], columns=feature_titles) return df_new input_val = buil_UI(X) st.markdown(""" """, unsafe_allow_html=True) st.markdown('

House price prediction - Apply LightGBM Hyperparameter Tuning with Optuna

', unsafe_allow_html=True) if st.button('Calculate House Price'): predictLGBM = model.predict(input_val) st.write('Predicted house price is: $',predictLGBM[0]) st.write('Please wait for 4 figures loading') #input_val st.write('SHAP Summary Plot and Interaction Summary Plot for this predicted house') st.set_option('deprecation.showPyplotGlobalUse', False) shap_values = shap.TreeExplainer(model).shap_values(input_val) fig_summary = shap.summary_plot(shap_values, input_val) st.pyplot(fig_summary) shap_interaction_values = shap.TreeExplainer(model).shap_interaction_values(input_val.iloc[:,:]) interaction_fig = shap.summary_plot(shap_interaction_values, input_val.iloc[:,:]) st.pyplot(interaction_fig) # X st.write('SHAP Summary Plot and Interaction Summary Plot for train dataset') shap_values_X = shap.TreeExplainer(model).shap_values(X) fig_summary_X = shap.summary_plot(shap_values_X, X) st.pyplot(fig_summary_X) shap_interaction_values_X = shap.TreeExplainer(model).shap_interaction_values(X.iloc[:,:]) interaction_fig_X = shap.summary_plot(shap_interaction_values_X, X.iloc[:,:]) st.pyplot(interaction_fig_X)