import pandas as pd import streamlit as st import numpy as np import matplotlib.pyplot as plt from sklearn.metrics import r2_score from sklearn.linear_model import LinearRegression from sklearn.model_selection import train_test_split import seaborn as sns #loading the data df = pd.read_csv('train5.csv') # Renaming columns df.rename(columns = {'MSSubClass':'MSSubClass','LotArea':'LotArea', 'OverallQual':'OverallQual','OverallCond':'OverallCond', 'YearBuilt':'YearBuilt', 'BsmtFinSF1':'BsmtFinSF1', 'BsmtFinSF2':'BsmtFinSF2', 'BsmtUnfSF':'BsmtUnfSF','TotalBsmtSF':'TotalBsmtSF'},inplace = True) # HEADINGS st.title('House Price Prediction') st.sidebar.header('Housing Data') st.subheader('Training Data Stats') st.write(df.describe()) # X AND Y DATA x = df.drop(['SalePrice'], axis = 1) y = df.iloc[:, -1] x_train, x_test, y_train, y_test = train_test_split(x,y, test_size = 0.2, random_state = 0) # FUNCTION def user_report(): MSSubClass = st.sidebar.slider('MSSubClass', 0,60, 200 ) LotArea = st.sidebar.slider('LotArea', 1300,10000,22000 ) OverallQual = st.sidebar.slider('OverallQual', 1,5, 10 ) OverallCond = st.sidebar.slider('OverallCond', 1,5, 9 ) YearBuilt = st.sidebar.slider('YearBuilt', 1872,1975, 2010 ) YearRemodAdd = st.sidebar.slider('YearRemodAdd', 1950,1975, 2010 ) BsmtFinSF1 = st.sidebar.slider('BsmtFinSF1', 0,2500, 5000 ) BsmtUnfSF = st.sidebar.slider('BsmtUnfSF', 0,2500, 5000 ) BsmtFinSF2 = st.sidebar.slider('BsmtFinSF2', 0,2500, 5000 ) TotalBsmtSF = st.sidebar.slider('TotalBsmtSF', 0,2500, 6000 ) #SalePrice = st.sidebar.slider('SalePrice', 0,300000, 800000 ) user_report_data = { 'MSSubClass':MSSubClass, 'LotArea':LotArea, 'OverallQual':OverallQual, 'OverallCond': OverallCond, 'YearBuilt':YearBuilt, 'YearRemodAdd': YearRemodAdd, 'BsmtFinSF1': BsmtFinSF1, 'BsmtUnfSF': BsmtUnfSF, 'BsmtFinSF2': BsmtFinSF2, 'TotalBsmtSF': TotalBsmtSF #'SalePrice': SalePrice, } report_data = pd.DataFrame(user_report_data, index=[0]) return report_data # Housing Data user_data = user_report() st.subheader('Housing Data') st.write(user_data) # MODEL lr = LinearRegression() lr.fit(x_train, y_train) user_result = lr.predict(user_data) # VISUALISATIONS st.title('Visualised Housing Data') # COLOR FUNCTION if user_result[0]==0: color = 'blue' else: color = 'red' # OUTPUT st.subheader('Price of House is : ') st.write(str(user_result)) st.title('output') st.subheader('r2_score: ') st.write(str(r2_score(y_test, lr.predict(x_test))*100)+'%')