ardifarizky commited on
Commit
b711091
1 Parent(s): 303fdbd

Update eda.py

Browse files
Files changed (1) hide show
  1. eda.py +70 -68
eda.py CHANGED
@@ -29,83 +29,85 @@ def run():
29
 
30
  d = pd.read_csv('hotel_bookings.csv')
31
 
32
- fig, ax = plt.subplots(nrows=2, ncols=2, figsize=(15, 10))
33
-
34
- sns.histplot(data=d, x='lead_time', hue='is_canceled',
35
- kde=True, ax=ax[0][0], palette='Set1').set_title("distribution of Lead Time")
36
-
37
- sns.histplot(data=d, x='booking_changes', hue='is_canceled',
38
- ax=ax[0][1], palette='Set1').set_title("distribution of Booking Changes")
39
 
40
- sns.histplot(data=d, x='deposit_type', hue='is_canceled',
41
- ax=ax[1][0], palette='Set1').set_title("distribution of Deposit Type")
42
 
43
- plt.tight_layout()
44
- st.pyplot(fig)
45
-
46
- booking_counts = d.groupby(['arrival_date_year', 'arrival_date_month', 'arrival_date_week_number', 'hotel']).size().reset_index(name='booking_count')
47
-
48
- pivot_table = booking_counts.pivot_table(index=['arrival_date_month', 'arrival_date_week_number'], columns=['arrival_date_year', 'hotel'], values='booking_count', fill_value=0)
 
 
 
 
 
49
 
50
- plt.figure(figsize=(12, 10))
51
- pivot_table.plot(kind='line')
52
- plt.title('Seasonal Booking Trends')
53
- plt.xlabel('Month and Week Number')
54
- plt.ylabel('Booking Count')
55
- plt.legend(title='Hotel Type')
56
- plt.xticks(rotation=45)
57
- plt.tight_layout()
58
- st.pyplot()
59
-
60
- demographics_counts = d[['babies', 'adults', 'children']].sum()
61
 
62
- # creating the pie chart
63
- plt.figure(figsize=(8, 8))
64
- plt.pie(demographics_counts, labels=demographics_counts.index, autopct='%1.1f%%', startangle=140)
65
- plt.title('Distribution of Guest Demographics')
66
- plt.axis('equal')
 
 
 
 
 
 
67
 
68
- st.pyplot()
69
-
70
 
71
- fig, ax = plt.subplots(nrows=2, ncols=2, figsize=(12, 10))
72
-
73
- sns.histplot(data=d, x='lead_time', hue='hotel', multiple='stack', bins=20, ax=ax[0, 0], palette='Set1')
74
- ax[0, 0].set_title("Booking Behavior by Hotel Type (Lead Time)")
75
-
76
- sns.barplot(data=d, x='hotel', y='is_canceled', ax=ax[0, 1], palette='Set1')
77
- ax[0, 1].set_title("Cancellation Rate by Hotel Type")
78
-
79
- sns.countplot(data=d, x='booking_changes', hue='hotel', ax=ax[1, 0], palette='Set1')
80
- ax[1, 0].set_title("Booking Changes by Hotel Type")
81
-
82
- sns.countplot(data=d, x='hotel', ax=ax[1, 1], palette='Set1')
83
- ax[1, 1].set_title("Total Bookings by Hotel Type")
84
-
85
-
86
- plt.tight_layout()
87
-
88
- st.pyplot(fig)
89
-
90
- plt.figure(figsize=(12, 6))
91
- sns.countplot(data=d, x='market_segment', palette='Set3')
92
- plt.title('Distribution of Market Segmentation')
93
- plt.xlabel('Market Segment')
94
- plt.ylabel('Count')
95
- plt.xticks(rotation=45, ha='right')
96
- plt.tight_layout()
97
 
98
- st.pyplot()
 
99
 
100
- # create a count plot for distribution channels
101
- plt.figure(figsize=(10, 6))
102
- sns.countplot(data=d, x='distribution_channel', palette='Set2')
103
- plt.title('Distribution of Distribution Channels')
104
- plt.xlabel('Distribution Channel')
105
- plt.ylabel('Count')
106
- plt.tight_layout()
107
 
108
- st.pyplot()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
 
110
  with st.sidebar:
111
 
 
29
 
30
  d = pd.read_csv('hotel_bookings.csv')
31
 
32
+ with st.container():
 
 
 
 
 
 
33
 
34
+ fig, ax = plt.subplots(nrows=2, ncols=2, figsize=(15, 10))
 
35
 
36
+ sns.histplot(data=d, x='lead_time', hue='is_canceled',
37
+ kde=True, ax=ax[0][0], palette='Set1').set_title("distribution of Lead Time")
38
+
39
+ sns.histplot(data=d, x='booking_changes', hue='is_canceled',
40
+ ax=ax[0][1], palette='Set1').set_title("distribution of Booking Changes")
41
+
42
+ sns.histplot(data=d, x='deposit_type', hue='is_canceled',
43
+ ax=ax[1][0], palette='Set1').set_title("distribution of Deposit Type")
44
+
45
+ plt.tight_layout()
46
+ st.pyplot(fig)
47
 
48
+ booking_counts = d.groupby(['arrival_date_year', 'arrival_date_month', 'arrival_date_week_number', 'hotel']).size().reset_index(name='booking_count')
 
 
 
 
 
 
 
 
 
 
49
 
50
+ pivot_table = booking_counts.pivot_table(index=['arrival_date_month', 'arrival_date_week_number'], columns=['arrival_date_year', 'hotel'], values='booking_count', fill_value=0)
51
+
52
+ plt.figure(figsize=(12, 10))
53
+ pivot_table.plot(kind='line')
54
+ plt.title('Seasonal Booking Trends')
55
+ plt.xlabel('Month and Week Number')
56
+ plt.ylabel('Booking Count')
57
+ plt.legend(title='Hotel Type')
58
+ plt.xticks(rotation=45)
59
+ plt.tight_layout()
60
+ st.pyplot()
61
 
62
+ demographics_counts = d[['babies', 'adults', 'children']].sum()
 
63
 
64
+ # creating the pie chart
65
+ plt.figure(figsize=(8, 8))
66
+ plt.pie(demographics_counts, labels=demographics_counts.index, autopct='%1.1f%%', startangle=140)
67
+ plt.title('Distribution of Guest Demographics')
68
+ plt.axis('equal')
69
+
70
+ st.pyplot()
71
+
72
+
73
+ fig, ax = plt.subplots(nrows=2, ncols=2, figsize=(12, 10))
74
+
75
+ sns.histplot(data=d, x='lead_time', hue='hotel', multiple='stack', bins=20, ax=ax[0, 0], palette='Set1')
76
+ ax[0, 0].set_title("Booking Behavior by Hotel Type (Lead Time)")
 
 
 
 
 
 
 
 
 
 
 
 
 
77
 
78
+ sns.barplot(data=d, x='hotel', y='is_canceled', ax=ax[0, 1], palette='Set1')
79
+ ax[0, 1].set_title("Cancellation Rate by Hotel Type")
80
 
81
+ sns.countplot(data=d, x='booking_changes', hue='hotel', ax=ax[1, 0], palette='Set1')
82
+ ax[1, 0].set_title("Booking Changes by Hotel Type")
 
 
 
 
 
83
 
84
+ sns.countplot(data=d, x='hotel', ax=ax[1, 1], palette='Set1')
85
+ ax[1, 1].set_title("Total Bookings by Hotel Type")
86
+
87
+
88
+ plt.tight_layout()
89
+
90
+ st.pyplot(fig)
91
+
92
+ plt.figure(figsize=(12, 6))
93
+ sns.countplot(data=d, x='market_segment', palette='Set3')
94
+ plt.title('Distribution of Market Segmentation')
95
+ plt.xlabel('Market Segment')
96
+ plt.ylabel('Count')
97
+ plt.xticks(rotation=45, ha='right')
98
+ plt.tight_layout()
99
+
100
+ st.pyplot()
101
+
102
+ # create a count plot for distribution channels
103
+ plt.figure(figsize=(10, 6))
104
+ sns.countplot(data=d, x='distribution_channel', palette='Set2')
105
+ plt.title('Distribution of Distribution Channels')
106
+ plt.xlabel('Distribution Channel')
107
+ plt.ylabel('Count')
108
+ plt.tight_layout()
109
+
110
+ st.pyplot()
111
 
112
  with st.sidebar:
113