Milestone2P1 / eda.py
casheu's picture
commit
49e7a62
import streamlit as st
from PIL import Image
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
st.set_page_config(
page_title='Model EDA',
layout='wide',
initial_sidebar_state='expanded'
)
def run():
# Title
st.title('House Price Prediction - EDA')
image = Image.open('image.jpeg')
st.image(image, caption='US Suburbs')
st.markdown('---')
st.subheader('Dataset')
df = pd.read_csv('USA_Housing.csv')
cols = {'Avg. Area Income': 'Income',
'Avg. Area House Age': 'Age',
'Avg. Area Number of Rooms': 'Rooms',
'Avg. Area Number of Bedrooms': 'Bedrooms',
'Area Population': 'Population'}
df.rename(columns=cols, inplace=True)
st.dataframe(df)
st.markdown('---')
st.subheader('Area Income')
def scat(df, x, y):
fig = plt.figure(figsize=(15, 10))
sns.scatterplot(data=df, x=x, y=y)
st.pyplot(fig)
scat(df, 'Income', 'Price')
st.markdown('---')
st.subheader('House Age')
eda = df.copy()
eda['Pricerange'] = 'Very High'
eda.loc[eda['Price'].between(1.5e+06, 1.75e+06), 'Pricerange'] = 'High'
eda.loc[eda['Price'].between(1.25e+06, 1.5e+06), 'Pricerange'] = 'Above Average'
eda.loc[eda['Price'].between(1e+06, 1.25e+06), 'Pricerange'] = 'Below Average'
eda.loc[eda['Price'].between(0.75e+06, 1e+06), 'Pricerange'] = 'Low'
eda.loc[eda['Price']<0.75e+06, 'Pricerange'] = 'Very Low'
def barc(df, x, y):
plot = df.groupby(x).mean().reset_index()
plot = plot.sort_values(y)
fig = plt.figure(figsize=(15, 5))
plt.bar(range(len(plot)), plot[y])
plt.xticks(range(len(plot)), plot[x], rotation=45)
plt.ylabel('Mean')
st.pyplot(fig)
barc(eda,'Pricerange','Age')
st.markdown('---')
st.subheader('Number of Rooms')
barc(eda,'Pricerange','Rooms')
st.markdown('---')
st.subheader('Area Population')
barc(eda,'Pricerange','Population')
st.markdown('---')
st.subheader('Connection Between Features')
def scatp(df, x, y, hue):
fig = plt.figure(figsize=(15, 10))
sns.scatterplot(data=df, x=x, y=y, hue=hue)
st.pyplot(fig)
scatp(eda, "Income", "Rooms", 'Pricerange')
scatp(eda, "Income", "Population", 'Pricerange')
eda['Roomscat'] = 9
eda.loc[eda['Rooms'].between(8, 9), 'Roomscat'] = 8
eda.loc[eda['Rooms'].between(7, 8), 'Roomscat'] = 7
eda.loc[eda['Rooms'].between(6, 7), 'Roomscat'] = 6
eda.loc[eda['Rooms'].between(5, 6), 'Roomscat'] = 5
eda.loc[eda['Rooms'].between(4, 5), 'Roomscat'] = 4
eda.loc[eda['Rooms'].between(3, 4), 'Roomscat'] = 3
eda.loc[eda['Rooms']<3, 'Roomscat'] = 2
barc(eda,'Roomscat','Age')
if __name__ == '__main__':
run()