File size: 2,806 Bytes
bc9b26f
be7643a
bc9b26f
 
 
 
 
0beeb88
bc9b26f
 
 
 
 
 
0beeb88
bc9b26f
4f07a40
 
bc9b26f
 
 
cbd1d0f
 
0beeb88
af729cf
 
 
 
 
 
0beeb88
bc9b26f
9097c51
 
 
 
d68c42a
49e7a62
d68c42a
 
 
 
92136b5
 
 
 
 
3d78478
 
 
 
 
 
 
 
af729cf
 
 
 
 
 
 
a10c13c
9097c51
 
 
 
 
0beeb88
9097c51
0beeb88
 
 
 
 
 
 
 
 
 
 
bc9b26f
5e44955
49e7a62
148b4d0
1bf0dd4
 
5e44955
bc9b26f
5e44955
bc9b26f
0beeb88
 
 
 
 
 
 
 
 
 
bc9b26f
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
import streamlit as st
from PIL import Image
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

st.set_page_config(
    page_title='Model EDA',
    layout='wide',
    initial_sidebar_state='expanded'
)

def run():
    # Title
    st.title('House Price Prediction - EDA')

    image = Image.open('image.jpeg')
    st.image(image, caption='US Suburbs')

    st.markdown('---')

    st.subheader('Dataset')

    df = pd.read_csv('USA_Housing.csv')
    cols = {'Avg. Area Income': 'Income',
        'Avg. Area House Age': 'Age',
        'Avg. Area Number of Rooms': 'Rooms',
        'Avg. Area Number of Bedrooms': 'Bedrooms',
        'Area Population': 'Population'}
    df.rename(columns=cols, inplace=True)
    st.dataframe(df)

    st.markdown('---')

    st.subheader('Area Income')

    def scat(df, x, y):
        fig = plt.figure(figsize=(15, 10))
        sns.scatterplot(data=df, x=x, y=y)
        st.pyplot(fig)

    scat(df, 'Income', 'Price')

    st.markdown('---')

    st.subheader('House Age')

    eda = df.copy()
    eda['Pricerange'] = 'Very High'
    eda.loc[eda['Price'].between(1.5e+06, 1.75e+06), 'Pricerange'] = 'High'
    eda.loc[eda['Price'].between(1.25e+06, 1.5e+06), 'Pricerange'] = 'Above Average'
    eda.loc[eda['Price'].between(1e+06, 1.25e+06), 'Pricerange'] = 'Below Average'
    eda.loc[eda['Price'].between(0.75e+06, 1e+06), 'Pricerange'] = 'Low'
    eda.loc[eda['Price']<0.75e+06, 'Pricerange'] = 'Very Low'

    def barc(df, x, y):
        plot = df.groupby(x).mean().reset_index()
        plot = plot.sort_values(y)
        fig = plt.figure(figsize=(15, 5))
        plt.bar(range(len(plot)), plot[y])
        plt.xticks(range(len(plot)), plot[x], rotation=45)
        plt.ylabel('Mean')
        st.pyplot(fig)

    barc(eda,'Pricerange','Age')

    st.markdown('---')

    st.subheader('Number of Rooms')

    barc(eda,'Pricerange','Rooms')

    st.markdown('---')

    st.subheader('Area Population')

    barc(eda,'Pricerange','Population')

    st.markdown('---')

    st.subheader('Connection Between Features')

    def scatp(df, x, y, hue):
        fig = plt.figure(figsize=(15, 10))
        sns.scatterplot(data=df, x=x, y=y, hue=hue)
        st.pyplot(fig)
    
    scatp(eda, "Income", "Rooms", 'Pricerange')

    scatp(eda, "Income", "Population", 'Pricerange')

    eda['Roomscat'] = 9
    eda.loc[eda['Rooms'].between(8, 9), 'Roomscat'] = 8
    eda.loc[eda['Rooms'].between(7, 8), 'Roomscat'] = 7
    eda.loc[eda['Rooms'].between(6, 7), 'Roomscat'] = 6
    eda.loc[eda['Rooms'].between(5, 6), 'Roomscat'] = 5
    eda.loc[eda['Rooms'].between(4, 5), 'Roomscat'] = 4
    eda.loc[eda['Rooms'].between(3, 4), 'Roomscat'] = 3
    eda.loc[eda['Rooms']<3, 'Roomscat'] = 2

    barc(eda,'Roomscat','Age')

if __name__ == '__main__':
    run()