import streamlit as st from PIL import Image import pandas as pd import seaborn as sns import matplotlib.pyplot as plt import numpy as np st.set_page_config( page_title='Model EDA', layout='wide', initial_sidebar_state='expanded' ) def run(): # Title st.title('House Price Prediction - EDA') image = Image.open('image.jpeg') st.image(image, caption='US Suburbs') st.markdown('---') df = pd.read_csv('USA_Housing.csv') st.dataframe(df) st.markdown('---') st.subheader('Area Income') eda = df.copy() eda['Pricerange'] = 'Very High' eda.loc[eda['Price'].between(1.5e+06, 1.75e+06), 'Pricerange'] = 'High' eda.loc[eda['Price'].between(1.25e+06, 1.5e+06), 'Pricerange'] = 'Above Average' eda.loc[eda['Price'].between(1e+06, 1.25e+06), 'Pricerange'] = 'Below Average' eda.loc[eda['Price'].between(0.75e+06, 1e+06), 'Pricerange'] = 'Low' eda.loc[eda['Price']<0.75e+06, 'Pricerange'] = 'Very Low' st.dataframe(eda) def barc(df, x, y): plot = df.groupby(x).mean().reset_index() plot = plot.sort_values(y) fig = plt.figure(figsize=(15, 5)) plt.bar(range(len(plot)), plot[y]) plt.xticks(range(len(plot)), plot[x], rotation=45) plt.ylabel('Mean') st.pyplot(fig) barc(eda,'Pricerange','Income') st.markdown('---') st.subheader('House Age') barc(eda,'Pricerange','Age') st.markdown('---') st.subheader('Number of Rooms') barc(eda,'Pricerange','Rooms') st.markdown('---') st.subheader('Area Population') barc(eda,'Pricerange','Population') st.markdown('---') st.subheader('Connection Between Features') fig = plt.figure(figsize=(15, 5)) sns.relplot(data=eda, x="Income", y="Rooms", hue='Pricerange') st.pyplot(fig) fig = plt.figure(figsize=(15, 5)) sns.relplot(data=eda, x="Income", y="Population", hue='Pricerange') st.pyplot(fig) eda['Roomscat'] = 9 eda.loc[eda['Rooms'].between(8, 9), 'Roomscat'] = 8 eda.loc[eda['Rooms'].between(7, 8), 'Roomscat'] = 7 eda.loc[eda['Rooms'].between(6, 7), 'Roomscat'] = 6 eda.loc[eda['Rooms'].between(5, 6), 'Roomscat'] = 5 eda.loc[eda['Rooms'].between(4, 5), 'Roomscat'] = 4 eda.loc[eda['Rooms'].between(3, 4), 'Roomscat'] = 3 eda.loc[eda['Rooms']<3, 'Roomscat'] = 2 barc(eda,'Roomscat','Age') if __name__ == '__main__': run()