Spaces:
Runtime error
Runtime error
File size: 5,141 Bytes
1c58500 44ff538 1c58500 c23995e 1c58500 15a269a 42fec1e 15a269a 22ca2d7 7204750 e421ede b711091 f7e19bd b711091 f7e19bd b711091 f7e19bd b711091 e421ede b711091 15a269a b711091 e421ede b711091 e421ede b711091 e421ede b711091 42fec1e 7204750 42fec1e cb6e1e6 bd2b07b 69b4839 bd2b07b 69b4839 bd2b07b 69b4839 bd2b07b 69b4839 bd2b07b 1c58500 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 |
import streamlit as st
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
from PIL import Image
st.set_page_config(
page_title= 'EDA',
layout='wide',
initial_sidebar_state='expanded'
)
st.set_option('deprecation.showPyplotGlobalUse', False)
hide_streamlit_style = """
<style>
#MainMenu {visibility: hidden;}
footer {visibility: hidden;}
</style>
"""
st.markdown(hide_streamlit_style, unsafe_allow_html=True)
def run():
st.title('EDA')
d = pd.read_csv('hotel_bookings.csv')
col1, col2, = st.columns(2)
with col1:
fig, ax = plt.subplots(nrows=2, ncols=2, figsize=(15, 10))
sns.histplot(data=d, x='lead_time', hue='is_canceled',
kde=True, ax=ax[0][0], palette='Set1').set_title("distribution of Lead Time")
sns.histplot(data=d, x='booking_changes', hue='is_canceled',
ax=ax[0][1], palette='Set1').set_title("distribution of Booking Changes")
sns.histplot(data=d, x='deposit_type', hue='is_canceled',
ax=ax[1][0], palette='Set1').set_title("distribution of Deposit Type")
plt.tight_layout()
st.pyplot(fig)
booking_counts = d.groupby(['arrival_date_year', 'arrival_date_month', 'arrival_date_week_number', 'hotel']).size().reset_index(name='booking_count')
pivot_table = booking_counts.pivot_table(index=['arrival_date_month', 'arrival_date_week_number'], columns=['arrival_date_year', 'hotel'], values='booking_count', fill_value=0)
plt.figure(figsize=(12, 10))
pivot_table.plot(kind='line')
plt.title('Seasonal Booking Trends')
plt.xlabel('Month and Week Number')
plt.ylabel('Booking Count')
plt.legend(title='Hotel Type')
plt.xticks(rotation=45)
plt.tight_layout()
st.pyplot()
demographics_counts = d[['babies', 'adults', 'children']].sum()
# creating the pie chart
plt.figure(figsize=(8, 8))
plt.pie(demographics_counts, labels=demographics_counts.index, autopct='%1.1f%%', startangle=140)
plt.title('Distribution of Guest Demographics')
plt.axis('equal')
st.pyplot()
fig, ax = plt.subplots(nrows=2, ncols=2, figsize=(12, 10))
sns.histplot(data=d, x='lead_time', hue='hotel', multiple='stack', bins=20, ax=ax[0, 0], palette='Set1')
ax[0, 0].set_title("Booking Behavior by Hotel Type (Lead Time)")
sns.barplot(data=d, x='hotel', y='is_canceled', ax=ax[0, 1], palette='Set1')
ax[0, 1].set_title("Cancellation Rate by Hotel Type")
sns.countplot(data=d, x='booking_changes', hue='hotel', ax=ax[1, 0], palette='Set1')
ax[1, 0].set_title("Booking Changes by Hotel Type")
sns.countplot(data=d, x='hotel', ax=ax[1, 1], palette='Set1')
ax[1, 1].set_title("Total Bookings by Hotel Type")
plt.tight_layout()
st.pyplot(fig)
plt.figure(figsize=(12, 6))
sns.countplot(data=d, x='market_segment', palette='Set3')
plt.title('Distribution of Market Segmentation')
plt.xlabel('Market Segment')
plt.ylabel('Count')
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
st.pyplot()
# create a count plot for distribution channels
plt.figure(figsize=(10, 6))
sns.countplot(data=d, x='distribution_channel', palette='Set2')
plt.title('Distribution of Distribution Channels')
plt.xlabel('Distribution Channel')
plt.ylabel('Count')
plt.tight_layout()
st.pyplot()
with col2:
st.text_area('About Exploratory Data Analysis',
'''- Bookings made well in advance, such as 250 days before the stay, often face cancellations. This suggests the need for flexible cancellation policies.
- Despite "Non Refundable" deposits, a significant number of cancellations occur. Unforeseen events may be causing these cancellations.
- April and May witness increased hotel bookings. This highlights the potential to optimize pricing and resources during these peak demand periods.
- The difficulty in identifying strong connections between columns is due to the unequal data distribution between city and resort hotels. Caution is advised when interpreting findings.
- Online Travel agents are favored for bookings. We can Strengthen partnerships with Online Travel agents, offering them exclusive deals or promotions to encourage more bookings through this channel. Focus marketing efforts on promoting these partnerships to attract a broader customer base. By implementing these solutions, hotels can adapt to changing customer preferences and market dynamics, ultimately enhancing customer satisfaction and revenue generation.'''
, height = 1000)
if __name__ == '__main__':
run() |