Spaces:
Runtime error
Runtime error
import streamlit as st | |
import pandas as pd | |
import seaborn as sns | |
import matplotlib.pyplot as plt | |
import plotly.express as px | |
from PIL import Image | |
st.set_page_config( | |
page_title= 'EDA', | |
layout='wide', | |
initial_sidebar_state='expanded' | |
) | |
hide_streamlit_style = """ | |
<style> | |
#MainMenu {visibility: hidden;} | |
footer {visibility: hidden;} | |
</style> | |
""" | |
st.markdown(hide_streamlit_style, unsafe_allow_html=True) | |
def run(): | |
st.title('EDA') | |
d = pd.read_csv('hotel_bookings.csv') | |
fig, ax = plt.subplots(nrows=2, ncols=2, figsize=(15, 10)) | |
sns.histplot(data=d, x='lead_time', hue='is_canceled', | |
kde=True, ax=ax[0][0], palette='Set1').set_title("distribution of Lead Time") | |
sns.histplot(data=d, x='booking_changes', hue='is_canceled', | |
ax=ax[0][1], palette='Set1').set_title("distribution of Booking Changes") | |
sns.histplot(data=d, x='deposit_type', hue='is_canceled', | |
ax=ax[1][0], palette='Set1').set_title("distribution of Deposit Type") | |
plt.tight_layout() | |
st.pyplot(fig) | |
booking_counts = d.groupby(['arrival_date_year', 'arrival_date_month', 'arrival_date_week_number', 'hotel']).size().reset_index(name='booking_count') | |
pivot_table = booking_counts.pivot_table(index=['arrival_date_month', 'arrival_date_week_number'], columns=['arrival_date_year', 'hotel'], values='booking_count', fill_value=0) | |
plt.figure(figsize=(12, 10)) | |
pivot_table.plot(kind='line') | |
plt.title('Seasonal Booking Trends') | |
plt.xlabel('Month and Week Number') | |
plt.ylabel('Booking Count') | |
plt.legend(title='Hotel Type') | |
plt.xticks(rotation=45) | |
plt.tight_layout() | |
st.pyplot() | |
demographics_counts = d[['babies', 'adults', 'children']].sum() | |
# creating the pie chart | |
plt.figure(figsize=(8, 8)) | |
plt.pie(demographics_counts, labels=demographics_counts.index, autopct='%1.1f%%', startangle=140) | |
plt.title('Distribution of Guest Demographics') | |
plt.axis('equal') | |
st.pyplot() | |
fig, ax = plt.subplots(nrows=2, ncols=2, figsize=(12, 10)) | |
sns.histplot(data=d, x='lead_time', hue='hotel', multiple='stack', bins=20, ax=ax[0, 0], palette='Set1') | |
ax[0, 0].set_title("Booking Behavior by Hotel Type (Lead Time)") | |
sns.barplot(data=d, x='hotel', y='is_canceled', ax=ax[0, 1], palette='Set1') | |
ax[0, 1].set_title("Cancellation Rate by Hotel Type") | |
sns.countplot(data=d, x='booking_changes', hue='hotel', ax=ax[1, 0], palette='Set1') | |
ax[1, 0].set_title("Booking Changes by Hotel Type") | |
sns.countplot(data=d, x='hotel', ax=ax[1, 1], palette='Set1') | |
ax[1, 1].set_title("Total Bookings by Hotel Type") | |
plt.tight_layout() | |
st.pyplot() | |
plt.figure(figsize=(12, 6)) | |
sns.countplot(data=d, x='market_segment', palette='Set3') | |
plt.title('Distribution of Market Segmentation') | |
plt.xlabel('Market Segment') | |
plt.ylabel('Count') | |
plt.xticks(rotation=45, ha='right') | |
plt.tight_layout() | |
plt.show() | |
# create a count plot for distribution channels | |
plt.figure(figsize=(10, 6)) | |
sns.countplot(data=d, x='distribution_channel', palette='Set2') | |
plt.title('Distribution of Distribution Channels') | |
plt.xlabel('Distribution Channel') | |
plt.ylabel('Count') | |
plt.tight_layout() | |
st.pyplot() | |
if __name__ == '__main__': | |
run() |