milestone2 / eda.py
ardifarizky's picture
Update eda.py
947469a
import streamlit as st
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
from PIL import Image
st.set_page_config(
page_title= 'EDA',
layout='wide',
initial_sidebar_state='expanded'
)
st.set_option('deprecation.showPyplotGlobalUse', False)
hide_streamlit_style = """
<style>
#MainMenu {visibility: hidden;}
footer {visibility: hidden;}
</style>
"""
st.markdown(hide_streamlit_style, unsafe_allow_html=True)
def run():
st.title('EDA')
d = pd.read_csv('hotel_bookings.csv')
col1, col2, = st.columns(2)
with col1:
fig, ax = plt.subplots(nrows=2, ncols=2, figsize=(15, 10))
sns.histplot(data=d, x='lead_time', hue='is_canceled',
kde=True, ax=ax[0][0], palette='Set1').set_title("distribution of Lead Time")
sns.histplot(data=d, x='booking_changes', hue='is_canceled',
ax=ax[0][1], palette='Set1').set_title("distribution of Booking Changes")
sns.histplot(data=d, x='deposit_type', hue='is_canceled',
ax=ax[1][0], palette='Set1').set_title("distribution of Deposit Type")
plt.tight_layout()
st.pyplot(fig)
booking_counts = d.groupby(['arrival_date_year', 'arrival_date_month', 'arrival_date_week_number', 'hotel']).size().reset_index(name='booking_count')
pivot_table = booking_counts.pivot_table(index=['arrival_date_month', 'arrival_date_week_number'], columns=['arrival_date_year', 'hotel'], values='booking_count', fill_value=0)
plt.figure(figsize=(12, 10))
pivot_table.plot(kind='line')
plt.title('Seasonal Booking Trends')
plt.xlabel('Month and Week Number')
plt.ylabel('Booking Count')
plt.legend(title='Hotel Type')
plt.xticks(rotation=45)
plt.tight_layout()
st.pyplot()
demographics_counts = d[['babies', 'adults', 'children']].sum()
# creating the pie chart
plt.figure(figsize=(8, 8))
plt.pie(demographics_counts, labels=demographics_counts.index, autopct='%1.1f%%', startangle=140)
plt.title('Distribution of Guest Demographics')
plt.axis('equal')
st.pyplot()
fig, ax = plt.subplots(nrows=2, ncols=2, figsize=(12, 10))
sns.histplot(data=d, x='lead_time', hue='hotel', multiple='stack', bins=20, ax=ax[0, 0], palette='Set1')
ax[0, 0].set_title("Booking Behavior by Hotel Type (Lead Time)")
sns.barplot(data=d, x='hotel', y='is_canceled', ax=ax[0, 1], palette='Set1')
ax[0, 1].set_title("Cancellation Rate by Hotel Type")
sns.countplot(data=d, x='booking_changes', hue='hotel', ax=ax[1, 0], palette='Set1')
ax[1, 0].set_title("Booking Changes by Hotel Type")
sns.countplot(data=d, x='hotel', ax=ax[1, 1], palette='Set1')
ax[1, 1].set_title("Total Bookings by Hotel Type")
plt.tight_layout()
st.pyplot(fig)
plt.figure(figsize=(12, 6))
sns.countplot(data=d, x='market_segment', palette='Set3')
plt.title('Distribution of Market Segmentation')
plt.xlabel('Market Segment')
plt.ylabel('Count')
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
st.pyplot()
# create a count plot for distribution channels
plt.figure(figsize=(10, 6))
sns.countplot(data=d, x='distribution_channel', palette='Set2')
plt.title('Distribution of Distribution Channels')
plt.xlabel('Distribution Channel')
plt.ylabel('Count')
plt.tight_layout()
st.pyplot()
with col2:
st.text_area('About Exploratory Data Analysis',
'''Bookings made well in advance, such as 250 days before the stay, often face cancellations. This suggests the need for flexible cancellation policies.
Despite "Non Refundable" deposits, a significant number of cancellations occur. Unforeseen events may be causing these cancellations.
April and May witness increased hotel bookings. This highlights the potential to optimize pricing and resources during these peak demand periods.
The difficulty in identifying strong connections between columns is due to the unequal data distribution between city and resort hotels. Caution is advised when interpreting findings.
Online Travel agents are favored for bookings. We can Strengthen partnerships with Online Travel agents, offering them exclusive deals or promotions to encourage more bookings through this channel. Focus marketing efforts on promoting these partnerships to attract a broader customer base. By implementing these solutions, hotels can adapt to changing customer preferences and market dynamics, ultimately enhancing customer satisfaction and revenue generation.'''
, height = 1000)
if __name__ == '__main__':
run()