chaphoto's picture
Update app.py
4b9b283
raw history blame
No virus
2.58 kB
import pandas as pd
import streamlit as st
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import r2_score
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
import seaborn as sns
#loading the data
df = pd.read_csv('train5.csv')
# Renaming columns
df.rename(columns = {'MSSubClass':'MSSubClass','LotArea':'LotArea', 'OverallQual':'OverallQual','OverallCond':'OverallCond', 'YearBuilt':'YearBuilt',
'BsmtFinSF1':'BsmtFinSF1', 'BsmtFinSF2':'BsmtFinSF2',
'BsmtUnfSF':'BsmtUnfSF','TotalBsmtSF':'TotalBsmtSF'},inplace = True)
# HEADINGS
st.title('House Price Prediction')
st.sidebar.header('Housing Data')
st.subheader('Training Data Stats')
st.write(df.describe())
# X AND Y DATA
x = df.drop(['SalePrice'], axis = 1)
y = df.iloc[:, -1]
x_train, x_test, y_train, y_test = train_test_split(x,y, test_size = 0.2, random_state = 0)
# FUNCTION
def user_report():
MSSubClass = st.sidebar.slider('MSSubClass', 0,60, 200 )
LotArea = st.sidebar.slider('LotArea', 1300,10000,22000 )
OverallQual = st.sidebar.slider('OverallQual', 1,5, 10 )
OverallCond = st.sidebar.slider('OverallCond', 1,5, 9 )
YearBuilt = st.sidebar.slider('YearBuilt', 1872,1975, 2010 )
YearRemodAdd = st.sidebar.slider('YearRemodAdd', 1950,1975, 2010 )
BsmtFinSF1 = st.sidebar.slider('BsmtFinSF1', 0,2500, 5000 )
BsmtUnfSF = st.sidebar.slider('BsmtUnfSF', 0,2500, 5000 )
BsmtFinSF2 = st.sidebar.slider('BsmtFinSF2', 0,2500, 5000 )
TotalBsmtSF = st.sidebar.slider('TotalBsmtSF', 0,2500, 6000 )
#SalePrice = st.sidebar.slider('SalePrice', 0,300000, 800000 )
user_report_data = {
'MSSubClass':MSSubClass,
'LotArea':LotArea,
'OverallQual':OverallQual,
'OverallCond': OverallCond,
'YearBuilt':YearBuilt,
'YearRemodAdd': YearRemodAdd,
'BsmtFinSF1': BsmtFinSF1,
'BsmtUnfSF': BsmtUnfSF,
'BsmtFinSF2': BsmtFinSF2,
'TotalBsmtSF': TotalBsmtSF
#'SalePrice': SalePrice,
}
report_data = pd.DataFrame(user_report_data, index=[0])
return report_data
# Housing Data
user_data = user_report()
st.subheader('Housing Data')
st.write(user_data)
# MODEL
lr = LinearRegression()
lr.fit(x_train, y_train)
user_result = lr.predict(user_data)
# VISUALISATIONS
st.title('Visualised Housing Data')
# COLOR FUNCTION
if user_result[0]==0:
color = 'blue'
else:
color = 'red'
# OUTPUT
st.subheader('Price of House is : ')
st.write(str(user_result))
st.title('output')
st.subheader('r2_score: ')
st.write(str(r2_score(y_test, lr.predict(x_test))*100)+'%')