File size: 2,426 Bytes
bc9b26f
4f07a40
bc9b26f
 
 
 
 
 
0beeb88
bc9b26f
 
 
 
 
 
0beeb88
bc9b26f
4f07a40
 
bc9b26f
 
 
0beeb88
 
bc9b26f
9097c51
 
 
 
3d78478
 
 
 
 
 
 
 
 
9097c51
 
 
0beeb88
9097c51
 
 
0beeb88
9097c51
 
 
 
 
 
 
 
 
 
 
0beeb88
9097c51
0beeb88
 
 
 
 
 
 
 
 
 
 
bc9b26f
 
0beeb88
bc9b26f
 
 
0beeb88
bc9b26f
 
0beeb88
 
 
 
 
 
 
 
 
 
bc9b26f
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
import streamlit as st
from PIL import Image
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

st.set_page_config(
    page_title='Model EDA',
    layout='wide',
    initial_sidebar_state='expanded'
)

def run():
    # Title
    st.title('House Price Prediction - EDA')

    image = Image.open('image.jpeg')
    st.image(image, caption='US Suburbs')

    st.markdown('---')

    df = pd.read_csv('USA_Housing.csv')
    st.dataframe(df)

    st.markdown('---')

    st.subheader('Area Income')

    eda = df.copy()
    eda['Pricerange'] = 'Very High'
    eda.loc[eda['Price'].between(1.5e+06, 1.75e+06), 'Pricerange'] = 'High'
    eda.loc[eda['Price'].between(1.25e+06, 1.5e+06), 'Pricerange'] = 'Above Average'
    eda.loc[eda['Price'].between(1e+06, 1.25e+06), 'Pricerange'] = 'Below Average'
    eda.loc[eda['Price'].between(0.75e+06, 1e+06), 'Pricerange'] = 'Low'
    eda.loc[eda['Price']<0.75e+06, 'Pricerange'] = 'Very Low'
    st.dataframe(eda)

    def barc(df, x, y):
        plot = df.groupby(x).mean().reset_index()
        plot = plot.sort_values(y)
        fig = plt.figure(figsize=(15, 5))
        plt.bar(range(len(plot)), plot[y])
        plt.xticks(range(len(plot)), plot[x], rotation=45)
        plt.ylabel('Mean')
        st.pyplot(fig)

    barc(eda,'Pricerange','Income')

    st.markdown('---')

    st.subheader('House Age')

    barc(eda,'Pricerange','Age')

    st.markdown('---')

    st.subheader('Number of Rooms')

    barc(eda,'Pricerange','Rooms')

    st.markdown('---')

    st.subheader('Area Population')

    barc(eda,'Pricerange','Population')

    st.markdown('---')

    st.subheader('Connection Between Features')

    fig = plt.figure(figsize=(15, 5))
    sns.relplot(data=eda, x="Income", y="Rooms", hue='Pricerange')
    st.pyplot(fig)

    fig = plt.figure(figsize=(15, 5))
    sns.relplot(data=eda, x="Income", y="Population", hue='Pricerange')
    st.pyplot(fig)

    eda['Roomscat'] = 9
    eda.loc[eda['Rooms'].between(8, 9), 'Roomscat'] = 8
    eda.loc[eda['Rooms'].between(7, 8), 'Roomscat'] = 7
    eda.loc[eda['Rooms'].between(6, 7), 'Roomscat'] = 6
    eda.loc[eda['Rooms'].between(5, 6), 'Roomscat'] = 5
    eda.loc[eda['Rooms'].between(4, 5), 'Roomscat'] = 4
    eda.loc[eda['Rooms'].between(3, 4), 'Roomscat'] = 3
    eda.loc[eda['Rooms']<3, 'Roomscat'] = 2

    barc(eda,'Roomscat','Age')

if __name__ == '__main__':
    run()