Milestone2P1 / eda.py
casheu's picture
commit 24/11/22
3d78478
raw
history blame
2.43 kB
import streamlit as st
from PIL import Image
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
st.set_page_config(
page_title='Model EDA',
layout='wide',
initial_sidebar_state='expanded'
)
def run():
# Title
st.title('House Price Prediction - EDA')
image = Image.open('image.jpeg')
st.image(image, caption='US Suburbs')
st.markdown('---')
df = pd.read_csv('USA_Housing.csv')
st.dataframe(df)
st.markdown('---')
st.subheader('Area Income')
eda = df.copy()
eda['Pricerange'] = 'Very High'
eda.loc[eda['Price'].between(1.5e+06, 1.75e+06), 'Pricerange'] = 'High'
eda.loc[eda['Price'].between(1.25e+06, 1.5e+06), 'Pricerange'] = 'Above Average'
eda.loc[eda['Price'].between(1e+06, 1.25e+06), 'Pricerange'] = 'Below Average'
eda.loc[eda['Price'].between(0.75e+06, 1e+06), 'Pricerange'] = 'Low'
eda.loc[eda['Price']<0.75e+06, 'Pricerange'] = 'Very Low'
st.dataframe(eda)
def barc(df, x, y):
plot = df.groupby(x).mean().reset_index()
plot = plot.sort_values(y)
fig = plt.figure(figsize=(15, 5))
plt.bar(range(len(plot)), plot[y])
plt.xticks(range(len(plot)), plot[x], rotation=45)
plt.ylabel('Mean')
st.pyplot(fig)
barc(eda,'Pricerange','Income')
st.markdown('---')
st.subheader('House Age')
barc(eda,'Pricerange','Age')
st.markdown('---')
st.subheader('Number of Rooms')
barc(eda,'Pricerange','Rooms')
st.markdown('---')
st.subheader('Area Population')
barc(eda,'Pricerange','Population')
st.markdown('---')
st.subheader('Connection Between Features')
fig = plt.figure(figsize=(15, 5))
sns.relplot(data=eda, x="Income", y="Rooms", hue='Pricerange')
st.pyplot(fig)
fig = plt.figure(figsize=(15, 5))
sns.relplot(data=eda, x="Income", y="Population", hue='Pricerange')
st.pyplot(fig)
eda['Roomscat'] = 9
eda.loc[eda['Rooms'].between(8, 9), 'Roomscat'] = 8
eda.loc[eda['Rooms'].between(7, 8), 'Roomscat'] = 7
eda.loc[eda['Rooms'].between(6, 7), 'Roomscat'] = 6
eda.loc[eda['Rooms'].between(5, 6), 'Roomscat'] = 5
eda.loc[eda['Rooms'].between(4, 5), 'Roomscat'] = 4
eda.loc[eda['Rooms'].between(3, 4), 'Roomscat'] = 3
eda.loc[eda['Rooms']<3, 'Roomscat'] = 2
barc(eda,'Roomscat','Age')
if __name__ == '__main__':
run()