import streamlit as st import pandas as pd import seaborn as sns import matplotlib.pyplot as plt import plotly.express as px from PIL import Image st.set_page_config( page_title= 'EDA', layout='wide', initial_sidebar_state='expanded' ) hide_streamlit_style = """ """ st.markdown(hide_streamlit_style, unsafe_allow_html=True) def run(): st.title('EDA') d = pd.read_csv('hotel_bookings.csv') fig, ax = plt.subplots(nrows=2, ncols=2, figsize=(15, 10)) sns.histplot(data=d, x='lead_time', hue='is_canceled', kde=True, ax=ax[0][0], palette='Set1').set_title("distribution of Lead Time") sns.histplot(data=d, x='booking_changes', hue='is_canceled', ax=ax[0][1], palette='Set1').set_title("distribution of Booking Changes") sns.histplot(data=d, x='deposit_type', hue='is_canceled', ax=ax[1][0], palette='Set1').set_title("distribution of Deposit Type") plt.tight_layout() st.pyplot(fig) booking_counts = d.groupby(['arrival_date_year', 'arrival_date_month', 'arrival_date_week_number', 'hotel']).size().reset_index(name='booking_count') pivot_table = booking_counts.pivot_table(index=['arrival_date_month', 'arrival_date_week_number'], columns=['arrival_date_year', 'hotel'], values='booking_count', fill_value=0) plt.figure(figsize=(12, 10)) pivot_table.plot(kind='line') plt.title('Seasonal Booking Trends') plt.xlabel('Month and Week Number') plt.ylabel('Booking Count') plt.legend(title='Hotel Type') plt.xticks(rotation=45) plt.tight_layout() st.pyplot() demographics_counts = d[['babies', 'adults', 'children']].sum() # creating the pie chart plt.figure(figsize=(8, 8)) plt.pie(demographics_counts, labels=demographics_counts.index, autopct='%1.1f%%', startangle=140) plt.title('Distribution of Guest Demographics') plt.axis('equal') st.pyplot() fig, ax = plt.subplots(nrows=2, ncols=2, figsize=(12, 10)) sns.histplot(data=d, x='lead_time', hue='hotel', multiple='stack', bins=20, ax=ax[0, 0], palette='Set1') ax[0, 0].set_title("Booking Behavior by Hotel Type (Lead Time)") sns.barplot(data=d, x='hotel', y='is_canceled', ax=ax[0, 1], palette='Set1') ax[0, 1].set_title("Cancellation Rate by Hotel Type") sns.countplot(data=d, x='booking_changes', hue='hotel', ax=ax[1, 0], palette='Set1') ax[1, 0].set_title("Booking Changes by Hotel Type") sns.countplot(data=d, x='hotel', ax=ax[1, 1], palette='Set1') ax[1, 1].set_title("Total Bookings by Hotel Type") plt.tight_layout() st.pyplot() plt.figure(figsize=(12, 6)) sns.countplot(data=d, x='market_segment', palette='Set3') plt.title('Distribution of Market Segmentation') plt.xlabel('Market Segment') plt.ylabel('Count') plt.xticks(rotation=45, ha='right') plt.tight_layout() plt.show() # create a count plot for distribution channels plt.figure(figsize=(10, 6)) sns.countplot(data=d, x='distribution_channel', palette='Set2') plt.title('Distribution of Distribution Channels') plt.xlabel('Distribution Channel') plt.ylabel('Count') plt.tight_layout() st.pyplot() if __name__ == '__main__': run()