ardifarizky commited on
Commit
15a269a
1 Parent(s): e421ede

Update eda.py

Browse files
Files changed (1) hide show
  1. eda.py +83 -68
eda.py CHANGED
@@ -24,88 +24,103 @@ st.markdown(hide_streamlit_style, unsafe_allow_html=True)
24
 
25
 
26
  def run():
27
- with st.sidebar:
28
- st.title('EDA')
29
-
30
- d = pd.read_csv('hotel_bookings.csv')
31
-
32
- fig, ax = plt.subplots(nrows=2, ncols=2, figsize=(15, 10))
33
 
34
- sns.histplot(data=d, x='lead_time', hue='is_canceled',
35
- kde=True, ax=ax[0][0], palette='Set1').set_title("distribution of Lead Time")
36
-
37
- sns.histplot(data=d, x='booking_changes', hue='is_canceled',
38
- ax=ax[0][1], palette='Set1').set_title("distribution of Booking Changes")
 
39
 
40
- sns.histplot(data=d, x='deposit_type', hue='is_canceled',
41
- ax=ax[1][0], palette='Set1').set_title("distribution of Deposit Type")
42
 
43
- plt.tight_layout()
44
- st.pyplot(fig)
45
-
46
- booking_counts = d.groupby(['arrival_date_year', 'arrival_date_month', 'arrival_date_week_number', 'hotel']).size().reset_index(name='booking_count')
47
-
48
- pivot_table = booking_counts.pivot_table(index=['arrival_date_month', 'arrival_date_week_number'], columns=['arrival_date_year', 'hotel'], values='booking_count', fill_value=0)
49
 
50
- plt.figure(figsize=(12, 10))
51
- pivot_table.plot(kind='line')
52
- plt.title('Seasonal Booking Trends')
53
- plt.xlabel('Month and Week Number')
54
- plt.ylabel('Booking Count')
55
- plt.legend(title='Hotel Type')
56
- plt.xticks(rotation=45)
57
- plt.tight_layout()
58
- st.pyplot()
59
 
60
- demographics_counts = d[['babies', 'adults', 'children']].sum()
61
-
62
- # creating the pie chart
63
- plt.figure(figsize=(8, 8))
64
- plt.pie(demographics_counts, labels=demographics_counts.index, autopct='%1.1f%%', startangle=140)
65
- plt.title('Distribution of Guest Demographics')
66
- plt.axis('equal')
67
-
68
- st.pyplot()
69
 
70
-
71
- fig, ax = plt.subplots(nrows=2, ncols=2, figsize=(12, 10))
72
 
73
- sns.histplot(data=d, x='lead_time', hue='hotel', multiple='stack', bins=20, ax=ax[0, 0], palette='Set1')
74
- ax[0, 0].set_title("Booking Behavior by Hotel Type (Lead Time)")
75
 
76
- sns.barplot(data=d, x='hotel', y='is_canceled', ax=ax[0, 1], palette='Set1')
77
- ax[0, 1].set_title("Cancellation Rate by Hotel Type")
 
 
 
 
78
 
79
- sns.countplot(data=d, x='booking_changes', hue='hotel', ax=ax[1, 0], palette='Set1')
80
- ax[1, 0].set_title("Booking Changes by Hotel Type")
 
 
 
 
 
 
 
 
 
81
 
82
- sns.countplot(data=d, x='hotel', ax=ax[1, 1], palette='Set1')
83
- ax[1, 1].set_title("Total Bookings by Hotel Type")
 
 
 
84
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
 
86
- plt.tight_layout()
87
 
88
- st.pyplot(fig)
 
 
 
 
 
 
89
 
90
- plt.figure(figsize=(12, 6))
91
- sns.countplot(data=d, x='market_segment', palette='Set3')
92
- plt.title('Distribution of Market Segmentation')
93
- plt.xlabel('Market Segment')
94
- plt.ylabel('Count')
95
- plt.xticks(rotation=45, ha='right')
96
- plt.tight_layout()
97
-
98
- st.pyplot()
99
-
100
- # create a count plot for distribution channels
101
- plt.figure(figsize=(10, 6))
102
- sns.countplot(data=d, x='distribution_channel', palette='Set2')
103
- plt.title('Distribution of Distribution Channels')
104
- plt.xlabel('Distribution Channel')
105
- plt.ylabel('Count')
106
- plt.tight_layout()
107
-
108
- st.pyplot()
109
 
110
 
111
  if __name__ == '__main__':
 
24
 
25
 
26
  def run():
27
+
28
+ st.title('EDA')
 
 
 
 
29
 
30
+ with st.sidebar:
31
+
32
+ st.markdown('
33
+ - Bookings made well in advance, such as 250 days before the stay, often face cancellations. This suggests the need for flexible cancellation policies.
34
+
35
+ - Despite "Non Refundable" deposits, a significant number of cancellations occur. Unforeseen events may be causing these cancellations.
36
 
37
+ - April and May witness increased hotel bookings. This highlights the potential to optimize pricing and resources during these peak demand periods.
 
38
 
39
+ - The difficulty in identifying strong connections between columns is due to the unequal data distribution between city and resort hotels. Caution is advised when interpreting findings.
 
 
 
 
 
40
 
41
+ - Online Travel agents are favored for bookings. We can Strengthen partnerships with Online Travel agents, offering them exclusive deals or promotions to encourage more bookings through this channel. Focus marketing efforts on promoting these partnerships to attract a broader customer base. By implementing these solutions, hotels can adapt to changing customer preferences and market dynamics, ultimately enhancing customer satisfaction and revenue generation.
42
+ ')
 
 
 
 
 
 
 
43
 
44
+
45
+ d = pd.read_csv('hotel_bookings.csv')
46
+
47
+ fig, ax = plt.subplots(nrows=2, ncols=2, figsize=(15, 10))
48
+
49
+ sns.histplot(data=d, x='lead_time', hue='is_canceled',
50
+ kde=True, ax=ax[0][0], palette='Set1').set_title("distribution of Lead Time")
 
 
51
 
52
+ sns.histplot(data=d, x='booking_changes', hue='is_canceled',
53
+ ax=ax[0][1], palette='Set1').set_title("distribution of Booking Changes")
54
 
55
+ sns.histplot(data=d, x='deposit_type', hue='is_canceled',
56
+ ax=ax[1][0], palette='Set1').set_title("distribution of Deposit Type")
57
 
58
+ plt.tight_layout()
59
+ st.pyplot(fig)
60
+
61
+ booking_counts = d.groupby(['arrival_date_year', 'arrival_date_month', 'arrival_date_week_number', 'hotel']).size().reset_index(name='booking_count')
62
+
63
+ pivot_table = booking_counts.pivot_table(index=['arrival_date_month', 'arrival_date_week_number'], columns=['arrival_date_year', 'hotel'], values='booking_count', fill_value=0)
64
 
65
+ plt.figure(figsize=(12, 10))
66
+ pivot_table.plot(kind='line')
67
+ plt.title('Seasonal Booking Trends')
68
+ plt.xlabel('Month and Week Number')
69
+ plt.ylabel('Booking Count')
70
+ plt.legend(title='Hotel Type')
71
+ plt.xticks(rotation=45)
72
+ plt.tight_layout()
73
+ st.pyplot()
74
+
75
+ demographics_counts = d[['babies', 'adults', 'children']].sum()
76
 
77
+ # creating the pie chart
78
+ plt.figure(figsize=(8, 8))
79
+ plt.pie(demographics_counts, labels=demographics_counts.index, autopct='%1.1f%%', startangle=140)
80
+ plt.title('Distribution of Guest Demographics')
81
+ plt.axis('equal')
82
 
83
+ st.pyplot()
84
+
85
+
86
+ fig, ax = plt.subplots(nrows=2, ncols=2, figsize=(12, 10))
87
+
88
+ sns.histplot(data=d, x='lead_time', hue='hotel', multiple='stack', bins=20, ax=ax[0, 0], palette='Set1')
89
+ ax[0, 0].set_title("Booking Behavior by Hotel Type (Lead Time)")
90
+
91
+ sns.barplot(data=d, x='hotel', y='is_canceled', ax=ax[0, 1], palette='Set1')
92
+ ax[0, 1].set_title("Cancellation Rate by Hotel Type")
93
+
94
+ sns.countplot(data=d, x='booking_changes', hue='hotel', ax=ax[1, 0], palette='Set1')
95
+ ax[1, 0].set_title("Booking Changes by Hotel Type")
96
+
97
+ sns.countplot(data=d, x='hotel', ax=ax[1, 1], palette='Set1')
98
+ ax[1, 1].set_title("Total Bookings by Hotel Type")
99
+
100
+
101
+ plt.tight_layout()
102
+
103
+ st.pyplot(fig)
104
+
105
+ plt.figure(figsize=(12, 6))
106
+ sns.countplot(data=d, x='market_segment', palette='Set3')
107
+ plt.title('Distribution of Market Segmentation')
108
+ plt.xlabel('Market Segment')
109
+ plt.ylabel('Count')
110
+ plt.xticks(rotation=45, ha='right')
111
+ plt.tight_layout()
112
 
113
+ st.pyplot()
114
 
115
+ # create a count plot for distribution channels
116
+ plt.figure(figsize=(10, 6))
117
+ sns.countplot(data=d, x='distribution_channel', palette='Set2')
118
+ plt.title('Distribution of Distribution Channels')
119
+ plt.xlabel('Distribution Channel')
120
+ plt.ylabel('Count')
121
+ plt.tight_layout()
122
 
123
+ st.pyplot()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
 
125
 
126
  if __name__ == '__main__':