Milestone2P1 / eda.py
casheu's picture
commit
49e7a62
raw
history blame contribute delete
No virus
2.81 kB
import streamlit as st
from PIL import Image
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
st.set_page_config(
page_title='Model EDA',
layout='wide',
initial_sidebar_state='expanded'
)
def run():
# Title
st.title('House Price Prediction - EDA')
image = Image.open('image.jpeg')
st.image(image, caption='US Suburbs')
st.markdown('---')
st.subheader('Dataset')
df = pd.read_csv('USA_Housing.csv')
cols = {'Avg. Area Income': 'Income',
'Avg. Area House Age': 'Age',
'Avg. Area Number of Rooms': 'Rooms',
'Avg. Area Number of Bedrooms': 'Bedrooms',
'Area Population': 'Population'}
df.rename(columns=cols, inplace=True)
st.dataframe(df)
st.markdown('---')
st.subheader('Area Income')
def scat(df, x, y):
fig = plt.figure(figsize=(15, 10))
sns.scatterplot(data=df, x=x, y=y)
st.pyplot(fig)
scat(df, 'Income', 'Price')
st.markdown('---')
st.subheader('House Age')
eda = df.copy()
eda['Pricerange'] = 'Very High'
eda.loc[eda['Price'].between(1.5e+06, 1.75e+06), 'Pricerange'] = 'High'
eda.loc[eda['Price'].between(1.25e+06, 1.5e+06), 'Pricerange'] = 'Above Average'
eda.loc[eda['Price'].between(1e+06, 1.25e+06), 'Pricerange'] = 'Below Average'
eda.loc[eda['Price'].between(0.75e+06, 1e+06), 'Pricerange'] = 'Low'
eda.loc[eda['Price']<0.75e+06, 'Pricerange'] = 'Very Low'
def barc(df, x, y):
plot = df.groupby(x).mean().reset_index()
plot = plot.sort_values(y)
fig = plt.figure(figsize=(15, 5))
plt.bar(range(len(plot)), plot[y])
plt.xticks(range(len(plot)), plot[x], rotation=45)
plt.ylabel('Mean')
st.pyplot(fig)
barc(eda,'Pricerange','Age')
st.markdown('---')
st.subheader('Number of Rooms')
barc(eda,'Pricerange','Rooms')
st.markdown('---')
st.subheader('Area Population')
barc(eda,'Pricerange','Population')
st.markdown('---')
st.subheader('Connection Between Features')
def scatp(df, x, y, hue):
fig = plt.figure(figsize=(15, 10))
sns.scatterplot(data=df, x=x, y=y, hue=hue)
st.pyplot(fig)
scatp(eda, "Income", "Rooms", 'Pricerange')
scatp(eda, "Income", "Population", 'Pricerange')
eda['Roomscat'] = 9
eda.loc[eda['Rooms'].between(8, 9), 'Roomscat'] = 8
eda.loc[eda['Rooms'].between(7, 8), 'Roomscat'] = 7
eda.loc[eda['Rooms'].between(6, 7), 'Roomscat'] = 6
eda.loc[eda['Rooms'].between(5, 6), 'Roomscat'] = 5
eda.loc[eda['Rooms'].between(4, 5), 'Roomscat'] = 4
eda.loc[eda['Rooms'].between(3, 4), 'Roomscat'] = 3
eda.loc[eda['Rooms']<3, 'Roomscat'] = 2
barc(eda,'Roomscat','Age')
if __name__ == '__main__':
run()