chaphoto's picture
Update app.py
4b9b283
import pandas as pd
import streamlit as st
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import r2_score
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
import seaborn as sns
#loading the data
df = pd.read_csv('train5.csv')
# Renaming columns
df.rename(columns = {'MSSubClass':'MSSubClass','LotArea':'LotArea', 'OverallQual':'OverallQual','OverallCond':'OverallCond', 'YearBuilt':'YearBuilt',
'BsmtFinSF1':'BsmtFinSF1', 'BsmtFinSF2':'BsmtFinSF2',
'BsmtUnfSF':'BsmtUnfSF','TotalBsmtSF':'TotalBsmtSF'},inplace = True)
# HEADINGS
st.title('House Price Prediction')
st.sidebar.header('Housing Data')
st.subheader('Training Data Stats')
st.write(df.describe())
# X AND Y DATA
x = df.drop(['SalePrice'], axis = 1)
y = df.iloc[:, -1]
x_train, x_test, y_train, y_test = train_test_split(x,y, test_size = 0.2, random_state = 0)
# FUNCTION
def user_report():
MSSubClass = st.sidebar.slider('MSSubClass', 0,60, 200 )
LotArea = st.sidebar.slider('LotArea', 1300,10000,22000 )
OverallQual = st.sidebar.slider('OverallQual', 1,5, 10 )
OverallCond = st.sidebar.slider('OverallCond', 1,5, 9 )
YearBuilt = st.sidebar.slider('YearBuilt', 1872,1975, 2010 )
YearRemodAdd = st.sidebar.slider('YearRemodAdd', 1950,1975, 2010 )
BsmtFinSF1 = st.sidebar.slider('BsmtFinSF1', 0,2500, 5000 )
BsmtUnfSF = st.sidebar.slider('BsmtUnfSF', 0,2500, 5000 )
BsmtFinSF2 = st.sidebar.slider('BsmtFinSF2', 0,2500, 5000 )
TotalBsmtSF = st.sidebar.slider('TotalBsmtSF', 0,2500, 6000 )
#SalePrice = st.sidebar.slider('SalePrice', 0,300000, 800000 )
user_report_data = {
'MSSubClass':MSSubClass,
'LotArea':LotArea,
'OverallQual':OverallQual,
'OverallCond': OverallCond,
'YearBuilt':YearBuilt,
'YearRemodAdd': YearRemodAdd,
'BsmtFinSF1': BsmtFinSF1,
'BsmtUnfSF': BsmtUnfSF,
'BsmtFinSF2': BsmtFinSF2,
'TotalBsmtSF': TotalBsmtSF
#'SalePrice': SalePrice,
}
report_data = pd.DataFrame(user_report_data, index=[0])
return report_data
# Housing Data
user_data = user_report()
st.subheader('Housing Data')
st.write(user_data)
# MODEL
lr = LinearRegression()
lr.fit(x_train, y_train)
user_result = lr.predict(user_data)
# VISUALISATIONS
st.title('Visualised Housing Data')
# COLOR FUNCTION
if user_result[0]==0:
color = 'blue'
else:
color = 'red'
# OUTPUT
st.subheader('Price of House is : ')
st.write(str(user_result))
st.title('output')
st.subheader('r2_score: ')
st.write(str(r2_score(y_test, lr.predict(x_test))*100)+'%')