ardifarizky commited on
Commit
e421ede
1 Parent(s): c23995e

Update eda.py

Browse files
Files changed (1) hide show
  1. eda.py +72 -72
eda.py CHANGED
@@ -24,88 +24,88 @@ st.markdown(hide_streamlit_style, unsafe_allow_html=True)
24
 
25
 
26
  def run():
 
 
27
 
28
- st.title('EDA')
29
-
30
- d = pd.read_csv('hotel_bookings.csv')
31
-
32
- fig, ax = plt.subplots(nrows=2, ncols=2, figsize=(15, 10))
33
-
34
- sns.histplot(data=d, x='lead_time', hue='is_canceled',
35
- kde=True, ax=ax[0][0], palette='Set1').set_title("distribution of Lead Time")
36
-
37
- sns.histplot(data=d, x='booking_changes', hue='is_canceled',
38
- ax=ax[0][1], palette='Set1').set_title("distribution of Booking Changes")
39
 
40
- sns.histplot(data=d, x='deposit_type', hue='is_canceled',
41
- ax=ax[1][0], palette='Set1').set_title("distribution of Deposit Type")
42
 
43
- plt.tight_layout()
44
- st.pyplot(fig)
45
-
46
- booking_counts = d.groupby(['arrival_date_year', 'arrival_date_month', 'arrival_date_week_number', 'hotel']).size().reset_index(name='booking_count')
47
-
48
- pivot_table = booking_counts.pivot_table(index=['arrival_date_month', 'arrival_date_week_number'], columns=['arrival_date_year', 'hotel'], values='booking_count', fill_value=0)
 
 
 
 
 
49
 
50
- plt.figure(figsize=(12, 10))
51
- pivot_table.plot(kind='line')
52
- plt.title('Seasonal Booking Trends')
53
- plt.xlabel('Month and Week Number')
54
- plt.ylabel('Booking Count')
55
- plt.legend(title='Hotel Type')
56
- plt.xticks(rotation=45)
57
- plt.tight_layout()
58
- st.pyplot()
59
-
60
- demographics_counts = d[['babies', 'adults', 'children']].sum()
61
 
62
- # creating the pie chart
63
- plt.figure(figsize=(8, 8))
64
- plt.pie(demographics_counts, labels=demographics_counts.index, autopct='%1.1f%%', startangle=140)
65
- plt.title('Distribution of Guest Demographics')
66
- plt.axis('equal')
 
 
 
 
 
 
67
 
68
- st.pyplot()
69
-
70
 
71
- fig, ax = plt.subplots(nrows=2, ncols=2, figsize=(12, 10))
72
-
73
- sns.histplot(data=d, x='lead_time', hue='hotel', multiple='stack', bins=20, ax=ax[0, 0], palette='Set1')
74
- ax[0, 0].set_title("Booking Behavior by Hotel Type (Lead Time)")
75
-
76
- sns.barplot(data=d, x='hotel', y='is_canceled', ax=ax[0, 1], palette='Set1')
77
- ax[0, 1].set_title("Cancellation Rate by Hotel Type")
78
-
79
- sns.countplot(data=d, x='booking_changes', hue='hotel', ax=ax[1, 0], palette='Set1')
80
- ax[1, 0].set_title("Booking Changes by Hotel Type")
81
-
82
- sns.countplot(data=d, x='hotel', ax=ax[1, 1], palette='Set1')
83
- ax[1, 1].set_title("Total Bookings by Hotel Type")
84
-
85
-
86
- plt.tight_layout()
87
-
88
- st.pyplot(fig)
89
-
90
- plt.figure(figsize=(12, 6))
91
- sns.countplot(data=d, x='market_segment', palette='Set3')
92
- plt.title('Distribution of Market Segmentation')
93
- plt.xlabel('Market Segment')
94
- plt.ylabel('Count')
95
- plt.xticks(rotation=45, ha='right')
96
- plt.tight_layout()
97
 
98
- st.pyplot()
 
99
 
100
- # create a count plot for distribution channels
101
- plt.figure(figsize=(10, 6))
102
- sns.countplot(data=d, x='distribution_channel', palette='Set2')
103
- plt.title('Distribution of Distribution Channels')
104
- plt.xlabel('Distribution Channel')
105
- plt.ylabel('Count')
106
- plt.tight_layout()
107
 
108
- st.pyplot()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
 
110
 
111
  if __name__ == '__main__':
 
24
 
25
 
26
  def run():
27
+ with st.sidebar:
28
+ st.title('EDA')
29
 
30
+ d = pd.read_csv('hotel_bookings.csv')
 
 
 
 
 
 
 
 
 
 
31
 
32
+ fig, ax = plt.subplots(nrows=2, ncols=2, figsize=(15, 10))
 
33
 
34
+ sns.histplot(data=d, x='lead_time', hue='is_canceled',
35
+ kde=True, ax=ax[0][0], palette='Set1').set_title("distribution of Lead Time")
36
+
37
+ sns.histplot(data=d, x='booking_changes', hue='is_canceled',
38
+ ax=ax[0][1], palette='Set1').set_title("distribution of Booking Changes")
39
+
40
+ sns.histplot(data=d, x='deposit_type', hue='is_canceled',
41
+ ax=ax[1][0], palette='Set1').set_title("distribution of Deposit Type")
42
+
43
+ plt.tight_layout()
44
+ st.pyplot(fig)
45
 
46
+ booking_counts = d.groupby(['arrival_date_year', 'arrival_date_month', 'arrival_date_week_number', 'hotel']).size().reset_index(name='booking_count')
 
 
 
 
 
 
 
 
 
 
47
 
48
+ pivot_table = booking_counts.pivot_table(index=['arrival_date_month', 'arrival_date_week_number'], columns=['arrival_date_year', 'hotel'], values='booking_count', fill_value=0)
49
+
50
+ plt.figure(figsize=(12, 10))
51
+ pivot_table.plot(kind='line')
52
+ plt.title('Seasonal Booking Trends')
53
+ plt.xlabel('Month and Week Number')
54
+ plt.ylabel('Booking Count')
55
+ plt.legend(title='Hotel Type')
56
+ plt.xticks(rotation=45)
57
+ plt.tight_layout()
58
+ st.pyplot()
59
 
60
+ demographics_counts = d[['babies', 'adults', 'children']].sum()
 
61
 
62
+ # creating the pie chart
63
+ plt.figure(figsize=(8, 8))
64
+ plt.pie(demographics_counts, labels=demographics_counts.index, autopct='%1.1f%%', startangle=140)
65
+ plt.title('Distribution of Guest Demographics')
66
+ plt.axis('equal')
67
+
68
+ st.pyplot()
69
+
70
+
71
+ fig, ax = plt.subplots(nrows=2, ncols=2, figsize=(12, 10))
72
+
73
+ sns.histplot(data=d, x='lead_time', hue='hotel', multiple='stack', bins=20, ax=ax[0, 0], palette='Set1')
74
+ ax[0, 0].set_title("Booking Behavior by Hotel Type (Lead Time)")
 
 
 
 
 
 
 
 
 
 
 
 
 
75
 
76
+ sns.barplot(data=d, x='hotel', y='is_canceled', ax=ax[0, 1], palette='Set1')
77
+ ax[0, 1].set_title("Cancellation Rate by Hotel Type")
78
 
79
+ sns.countplot(data=d, x='booking_changes', hue='hotel', ax=ax[1, 0], palette='Set1')
80
+ ax[1, 0].set_title("Booking Changes by Hotel Type")
 
 
 
 
 
81
 
82
+ sns.countplot(data=d, x='hotel', ax=ax[1, 1], palette='Set1')
83
+ ax[1, 1].set_title("Total Bookings by Hotel Type")
84
+
85
+
86
+ plt.tight_layout()
87
+
88
+ st.pyplot(fig)
89
+
90
+ plt.figure(figsize=(12, 6))
91
+ sns.countplot(data=d, x='market_segment', palette='Set3')
92
+ plt.title('Distribution of Market Segmentation')
93
+ plt.xlabel('Market Segment')
94
+ plt.ylabel('Count')
95
+ plt.xticks(rotation=45, ha='right')
96
+ plt.tight_layout()
97
+
98
+ st.pyplot()
99
+
100
+ # create a count plot for distribution channels
101
+ plt.figure(figsize=(10, 6))
102
+ sns.countplot(data=d, x='distribution_channel', palette='Set2')
103
+ plt.title('Distribution of Distribution Channels')
104
+ plt.xlabel('Distribution Channel')
105
+ plt.ylabel('Count')
106
+ plt.tight_layout()
107
+
108
+ st.pyplot()
109
 
110
 
111
  if __name__ == '__main__':