Spaces:
Runtime error
Runtime error
import streamlit as st | |
from PIL import Image | |
import pandas as pd | |
import seaborn as sns | |
import matplotlib.pyplot as plt | |
st.set_page_config( | |
page_title='Model EDA', | |
layout='wide', | |
initial_sidebar_state='expanded' | |
) | |
def run(): | |
# Title | |
st.title('House Price Prediction - EDA') | |
image = Image.open('image.jpeg') | |
st.image(image, caption='US Suburbs') | |
st.markdown('---') | |
st.subheader('Dataset') | |
df = pd.read_csv('USA_Housing.csv') | |
cols = {'Avg. Area Income': 'Income', | |
'Avg. Area House Age': 'Age', | |
'Avg. Area Number of Rooms': 'Rooms', | |
'Avg. Area Number of Bedrooms': 'Bedrooms', | |
'Area Population': 'Population'} | |
df.rename(columns=cols, inplace=True) | |
st.dataframe(df) | |
st.markdown('---') | |
st.subheader('Area Income') | |
def scat(df, x, y): | |
fig = plt.figure(figsize=(15, 10)) | |
sns.scatterplot(data=df, x=x, y=y) | |
st.pyplot(fig) | |
scat(df, 'Income', 'Price') | |
st.markdown('---') | |
st.subheader('House Age') | |
eda = df.copy() | |
eda['Pricerange'] = 'Very High' | |
eda.loc[eda['Price'].between(1.5e+06, 1.75e+06), 'Pricerange'] = 'High' | |
eda.loc[eda['Price'].between(1.25e+06, 1.5e+06), 'Pricerange'] = 'Above Average' | |
eda.loc[eda['Price'].between(1e+06, 1.25e+06), 'Pricerange'] = 'Below Average' | |
eda.loc[eda['Price'].between(0.75e+06, 1e+06), 'Pricerange'] = 'Low' | |
eda.loc[eda['Price']<0.75e+06, 'Pricerange'] = 'Very Low' | |
def barc(df, x, y): | |
plot = df.groupby(x).mean().reset_index() | |
plot = plot.sort_values(y) | |
fig = plt.figure(figsize=(15, 5)) | |
plt.bar(range(len(plot)), plot[y]) | |
plt.xticks(range(len(plot)), plot[x], rotation=45) | |
plt.ylabel('Mean') | |
st.pyplot(fig) | |
barc(eda,'Pricerange','Age') | |
st.markdown('---') | |
st.subheader('Number of Rooms') | |
barc(eda,'Pricerange','Rooms') | |
st.markdown('---') | |
st.subheader('Area Population') | |
barc(eda,'Pricerange','Population') | |
st.markdown('---') | |
st.subheader('Connection Between Features') | |
def scatp(df, x, y, hue): | |
fig = plt.figure(figsize=(15, 10)) | |
sns.scatterplot(data=df, x=x, y=y, hue=hue) | |
st.pyplot(fig) | |
scatp(eda, "Income", "Rooms", 'Pricerange') | |
scatp(eda, "Income", "Population", 'Pricerange') | |
eda['Roomscat'] = 9 | |
eda.loc[eda['Rooms'].between(8, 9), 'Roomscat'] = 8 | |
eda.loc[eda['Rooms'].between(7, 8), 'Roomscat'] = 7 | |
eda.loc[eda['Rooms'].between(6, 7), 'Roomscat'] = 6 | |
eda.loc[eda['Rooms'].between(5, 6), 'Roomscat'] = 5 | |
eda.loc[eda['Rooms'].between(4, 5), 'Roomscat'] = 4 | |
eda.loc[eda['Rooms'].between(3, 4), 'Roomscat'] = 3 | |
eda.loc[eda['Rooms']<3, 'Roomscat'] = 2 | |
barc(eda,'Roomscat','Age') | |
if __name__ == '__main__': | |
run() |