File size: 5,104 Bytes
1c58500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c378d97
f7e19bd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1454e13
1c58500
c378d97
4c37034
1454e13
c378d97
4c37034
1454e13
c378d97
4c37034
1454e13
c378d97
4c37034
1454e13
 
 
 
f7e19bd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1c58500
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
import streamlit as st
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
from PIL import Image

st.set_page_config(
    page_title= 'FIFA 2022',
    layout='wide',
    initial_sidebar_state='expanded'
)

hide_streamlit_style = """
            <style>
            #MainMenu {visibility: hidden;}
            footer {visibility: hidden;}
            </style>
            """
st.markdown(hide_streamlit_style, unsafe_allow_html=True) 




def run():
    
    st.title('Heart Failure Prediction')
    # st.subheader('Heart Failure Prediction Exploratory Data Analysis')
    # #Show Dataframe
    d = pd.read_csv('hotel_bookings.csv')
    corr = d.corr()
    def pearson_correlation(x, y):

      # dind the mean of each array
      x_mean = np.mean(x)
      y_mean = np.mean(y)
    
      # find the covariance of the two arrays
      covariance = np.cov(x, y)[0, 1]
    
      # find the standard deviation of each array
      x_std = np.std(x)
      y_std = np.std(y)
    
      # calculate the Pearson correlation coefficient
      r = covariance / (x_std * y_std)
    
      return r

    mask = np.zeros_like(corr)
    mask[np.triu_indices_from(mask)] = True

    sns.set(style='white')
    fig, ax = plt.subplots(figsize=(12, 9))
    cmap = sns.diverging_palette(220, 10, as_cmap=True)
    
    sns.heatmap(corr, mask=mask, cmap=cmap, vmax=1, center=0,
                square=True, linewidths=.5, cbar_kws={"shrink": .5})
    
    plt.title('Data Correlation')
    st.pyplot(fig)

    fig, ax = plt.subplots(nrows=2, ncols=2, figsize=(15, 10))

    sns.histplot(data=d, x='lead_time', hue='is_canceled',
                 kde=True, ax=ax[0][0], palette='Set1').set_title("distribution of Lead Time")
    
    sns.histplot(data=d, x='booking_changes', hue='is_canceled',
                 ax=ax[0][1], palette='Set1').set_title("distribution of Booking Changes")
    
    sns.histplot(data=d, x='deposit_type', hue='is_canceled',
                 ax=ax[1][0], palette='Set1').set_title("distribution of Deposit Type")
    
    plt.tight_layout()
    st.pyplot(fig)

    booking_counts = d.groupby(['arrival_date_year', 'arrival_date_month', 'arrival_date_week_number', 'hotel']).size().reset_index(name='booking_count')

    pivot_table = booking_counts.pivot_table(index=['arrival_date_month', 'arrival_date_week_number'], columns=['arrival_date_year', 'hotel'], values='booking_count', fill_value=0)
    
    plt.figure(figsize=(12, 10))
    pivot_table.plot(kind='line')
    plt.title('Seasonal Booking Trends')
    plt.xlabel('Month and Week Number')
    plt.ylabel('Booking Count')
    plt.legend(title='Hotel Type')
    plt.xticks(rotation=45)
    plt.tight_layout()
    st.pyplot(fig)

    demographics_counts = d[['babies', 'adults', 'children']].sum()
    
    # creating the pie chart
    plt.figure(figsize=(8, 8))
    plt.pie(demographics_counts, labels=demographics_counts.index, autopct='%1.1f%%', startangle=140)
    plt.title('Distribution of Guest Demographics')
    plt.axis('equal')  
    
    st.pyplot(fig)

        
    fig, ax = plt.subplots(nrows=2, ncols=2, figsize=(12, 10))

    sns.histplot(data=d, x='lead_time', hue='hotel', multiple='stack', bins=20, ax=ax[0, 0], palette='Set1')
    ax[0, 0].set_title("Booking Behavior by Hotel Type (Lead Time)")

    sns.barplot(data=d, x='hotel', y='is_canceled', ax=ax[0, 1], palette='Set1')
    ax[0, 1].set_title("Cancellation Rate by Hotel Type")

    sns.countplot(data=d, x='booking_changes', hue='hotel', ax=ax[1, 0], palette='Set1')
    ax[1, 0].set_title("Booking Changes by Hotel Type")

    sns.countplot(data=d, x='hotel', ax=ax[1, 1], palette='Set1')
    ax[1, 1].set_title("Total Bookings by Hotel Type")


    plt.tight_layout()

    st.pyplot(fig)

    plt.figure(figsize=(12, 6))
    sns.countplot(data=d, x='market_segment', palette='Set3')
    plt.title('Distribution of Market Segmentation')
    plt.xlabel('Market Segment')
    plt.ylabel('Count')
    plt.xticks(rotation=45, ha='right')
    plt.tight_layout()
    
    plt.show()
    
    # create a count plot for distribution channels
    plt.figure(figsize=(10, 6))
    sns.countplot(data=d, x='distribution_channel', palette='Set2')
    plt.title('Distribution of Distribution Channels')
    plt.xlabel('Distribution Channel')
    plt.ylabel('Count')
    plt.tight_layout()
    
    st.pyplot(fig)
    
    # st.write('#### scatterplot berdasarkan Input User')
    # pilihan1 = st.selectbox('Pilih column : ', ('age', 'creatinine_phosphokinase','ejection_fraction', 'platelets','serum_creatinine', 'serum_sodium', 'time'),key=1)
    # pilihan2 = st.selectbox('Pilih column : ', ('age', 'creatinine_phosphokinase','ejection_fraction', 'platelets','serum_creatinine', 'serum_sodium', 'time'),key=2)
    # pilihan3 = st.selectbox('Pilih column : ', ('anaemia', 'diabetes','high_blood_pressure', 'sex','smoking', 'DEATH_EVENT'),key=3)
    # fig = plt.figure(figsize=(15, 5))
    # sns.scatterplot(data=d,x=d[pilihan1],y=d[pilihan2],hue=d[pilihan3])
    # st.pyplot(fig)
    

if __name__ == '__main__':
    run()