File size: 5,141 Bytes
1c58500
 
 
 
 
 
 
 
44ff538
1c58500
 
 
c23995e
1c58500
 
 
 
 
 
 
 
 
 
 
 
 
15a269a
42fec1e
15a269a
 
 
22ca2d7
7204750
 
e421ede
b711091
f7e19bd
b711091
 
 
 
 
 
 
 
 
 
 
f7e19bd
b711091
f7e19bd
b711091
 
 
 
 
 
 
 
 
 
 
e421ede
b711091
15a269a
b711091
 
 
 
 
 
 
 
 
 
 
 
 
e421ede
b711091
 
e421ede
b711091
 
e421ede
b711091
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42fec1e
7204750
42fec1e
cb6e1e6
bd2b07b
69b4839
bd2b07b
69b4839
bd2b07b
69b4839
bd2b07b
69b4839
bd2b07b
 
1c58500
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
import streamlit as st
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
from PIL import Image

st.set_page_config(
    page_title= 'EDA',
    layout='wide',
    initial_sidebar_state='expanded'
)
st.set_option('deprecation.showPyplotGlobalUse', False)

hide_streamlit_style = """
            <style>
            #MainMenu {visibility: hidden;}
            footer {visibility: hidden;}
            </style>
            """
st.markdown(hide_streamlit_style, unsafe_allow_html=True) 




def run():

    st.title('EDA')    

    d = pd.read_csv('hotel_bookings.csv')

    col1, col2, = st.columns(2)
        
    with col1:
    
        fig, ax = plt.subplots(nrows=2, ncols=2, figsize=(15, 10))
    
        sns.histplot(data=d, x='lead_time', hue='is_canceled',
                     kde=True, ax=ax[0][0], palette='Set1').set_title("distribution of Lead Time")
        
        sns.histplot(data=d, x='booking_changes', hue='is_canceled',
                     ax=ax[0][1], palette='Set1').set_title("distribution of Booking Changes")
        
        sns.histplot(data=d, x='deposit_type', hue='is_canceled',
                     ax=ax[1][0], palette='Set1').set_title("distribution of Deposit Type")
        
        plt.tight_layout()
        st.pyplot(fig)
    
        booking_counts = d.groupby(['arrival_date_year', 'arrival_date_month', 'arrival_date_week_number', 'hotel']).size().reset_index(name='booking_count')
    
        pivot_table = booking_counts.pivot_table(index=['arrival_date_month', 'arrival_date_week_number'], columns=['arrival_date_year', 'hotel'], values='booking_count', fill_value=0)
        
        plt.figure(figsize=(12, 10))
        pivot_table.plot(kind='line')
        plt.title('Seasonal Booking Trends')
        plt.xlabel('Month and Week Number')
        plt.ylabel('Booking Count')
        plt.legend(title='Hotel Type')
        plt.xticks(rotation=45)
        plt.tight_layout()
        st.pyplot()
    
        demographics_counts = d[['babies', 'adults', 'children']].sum()
        
        # creating the pie chart
        plt.figure(figsize=(8, 8))
        plt.pie(demographics_counts, labels=demographics_counts.index, autopct='%1.1f%%', startangle=140)
        plt.title('Distribution of Guest Demographics')
        plt.axis('equal')  
        
        st.pyplot()
    
            
        fig, ax = plt.subplots(nrows=2, ncols=2, figsize=(12, 10))
    
        sns.histplot(data=d, x='lead_time', hue='hotel', multiple='stack', bins=20, ax=ax[0, 0], palette='Set1')
        ax[0, 0].set_title("Booking Behavior by Hotel Type (Lead Time)")
    
        sns.barplot(data=d, x='hotel', y='is_canceled', ax=ax[0, 1], palette='Set1')
        ax[0, 1].set_title("Cancellation Rate by Hotel Type")
    
        sns.countplot(data=d, x='booking_changes', hue='hotel', ax=ax[1, 0], palette='Set1')
        ax[1, 0].set_title("Booking Changes by Hotel Type")
    
        sns.countplot(data=d, x='hotel', ax=ax[1, 1], palette='Set1')
        ax[1, 1].set_title("Total Bookings by Hotel Type")
    
    
        plt.tight_layout()
    
        st.pyplot(fig)
    
        plt.figure(figsize=(12, 6))
        sns.countplot(data=d, x='market_segment', palette='Set3')
        plt.title('Distribution of Market Segmentation')
        plt.xlabel('Market Segment')
        plt.ylabel('Count')
        plt.xticks(rotation=45, ha='right')
        plt.tight_layout()
        
        st.pyplot()
        
        # create a count plot for distribution channels
        plt.figure(figsize=(10, 6))
        sns.countplot(data=d, x='distribution_channel', palette='Set2')
        plt.title('Distribution of Distribution Channels')
        plt.xlabel('Distribution Channel')
        plt.ylabel('Count')
        plt.tight_layout()
        
        st.pyplot()

    with col2:

        st.text_area('About Exploratory Data Analysis', 
        '''- Bookings made well in advance, such as 250 days before the stay, often face cancellations. This suggests the need for flexible cancellation policies.
        
        - Despite "Non Refundable" deposits, a significant number of cancellations occur. Unforeseen events may be causing these cancellations.
        
        - April and May witness increased hotel bookings. This highlights the potential to optimize pricing and resources during these peak demand periods.
        
        - The difficulty in identifying strong connections between columns is due to the unequal data distribution between city and resort hotels. Caution is advised when interpreting findings.
        
        - Online Travel agents are favored for bookings. We can Strengthen partnerships with Online Travel agents, offering them exclusive deals or promotions to encourage more bookings through this channel. Focus marketing efforts on promoting these partnerships to attract a broader customer base. By implementing these solutions, hotels can adapt to changing customer preferences and market dynamics, ultimately enhancing customer satisfaction and revenue generation.'''
        , height = 1000)

if __name__ == '__main__':
    run()