Spaces:
Runtime error
Runtime error
# app.py | |
import streamlit as st | |
import pandas as pd | |
import numpy as np | |
import pickle | |
import matplotlib.pyplot as plt | |
import seaborn as sns | |
from datetime import timedelta | |
from pandas.tseries.offsets import MonthEnd | |
from statsmodels.tsa.statespace.sarimax import SARIMAX | |
from statsmodels.tsa.stattools import adfuller | |
# # Load models | |
# with open('./revenue_forcast.pkl', 'rb') as file: | |
# arima_model = pickle.load(file) | |
# # Load data | |
# file_path = './Dataset/hotel_booking.csv' | |
# df = pd.read_csv(file_path) | |
# Preprocess data for Streamlit | |
numerical_cols = df.select_dtypes(include=['float64', 'int64']).columns | |
categorical_cols = df.select_dtypes(include=['object']).columns | |
for col in categorical_cols: | |
df[col] = df[col].astype('category') | |
# Streamlit app | |
st.title('Hotel Booking Analysis') | |
# Navigation | |
st.sidebar.title('Navigation') | |
options = st.sidebar.radio('Select a page:', ['Overview', 'Revenue Forecasting', 'Predict Booking Cancellations', 'Market Segmentation', 'Customer Lifetime Value']) | |
if options == 'Overview': | |
st.header('Overview') | |
st.write('This app provides insights and predictions for hotel bookings.') | |
elif options == 'Revenue Forecasting': | |
# Streamlit app title | |
st.title('Hotel Booking Revenue Forecasting with SARIMA') | |
# File uploader | |
uploaded_file = st.file_uploader("Choose a CSV file", type="csv") | |
if uploaded_file is not None: | |
# Load the dataset | |
data = pd.read_csv(uploaded_file) | |
# Display the first few rows of the dataset | |
st.write("## Dataset Preview") | |
st.write(data.head()) | |
# Convert arrival_date_year and arrival_date_month to a datetime format | |
data['arrival_date'] = pd.to_datetime(data['arrival_date_year'].astype(str) + '-' + | |
data['arrival_date_month'].astype(str) + '-01') | |
data['arrival_date'] += MonthEnd(0) | |
# Calculate monthly revenue | |
monthly_revenue = data[data['is_canceled'] == 0].groupby('arrival_date')['adr'].sum().reset_index() | |
# Plot monthly revenue | |
st.write("## Monthly Revenue") | |
plt.figure(figsize=(12, 6)) | |
sns.lineplot(x='arrival_date', y='adr', data=monthly_revenue) | |
plt.title('Monthly Revenue') | |
plt.xlabel('Month') | |
plt.ylabel('Revenue') | |
plt.xticks(rotation=45) | |
plt.tight_layout() | |
st.pyplot(plt) | |
# Check for stationarity | |
result = adfuller(monthly_revenue['adr']) | |
st.write(f'## ADF Statistic: {result[0]}') | |
st.write(f'## p-value: {result[1]}') | |
# If the series is not stationary, take the first difference | |
monthly_revenue['adr_diff'] = monthly_revenue['adr'].diff().dropna() | |
# Model parameters | |
p = st.slider('AR order (p)', 0, 5, 1) | |
d = st.slider('Differencing order (d)', 0, 2, 1) | |
q = st.slider('MA order (q)', 0, 5, 1) | |
P = st.slider('Seasonal AR order (P)', 0, 2, 1) | |
D = st.slider('Seasonal differencing order (D)', 0, 2, 1) | |
Q = st.slider('Seasonal MA order (Q)', 0, 2, 1) | |
# Fit the SARIMA model with user-defined parameters | |
model = SARIMAX(monthly_revenue['adr'], | |
order=(p, d, q), | |
seasonal_order=(P, D, Q, 12)) | |
model_fit = model.fit(disp=False) | |
# Make predictions | |
forecast_steps = 12 # Forecast for the next 12 months | |
forecast = model_fit.get_forecast(steps=forecast_steps) | |
forecast_index = pd.date_range(start=monthly_revenue['arrival_date'].max(), | |
periods=forecast_steps, freq='M') | |
forecast_df = pd.DataFrame({'arrival_date': forecast_index, | |
'forecast': forecast.predicted_mean}) | |
# Plot the results | |
st.write("## Revenue Forecast") | |
plt.figure(figsize=(12, 6)) | |
sns.lineplot(x='arrival_date', y='adr', data=monthly_revenue, label='Historical Revenue') | |
sns.lineplot(x='arrival_date', y='forecast', data=forecast_df, label='Forecasted Revenue') | |
plt.title('Revenue Forecast') | |
plt.xlabel('Month') | |
plt.ylabel('Revenue') | |
plt.xticks(rotation=45) | |
plt.legend() | |
plt.tight_layout() | |
st.pyplot(plt) | |
# Display forecasted values | |
st.write("## Forecasted Revenue for the Next 12 Months") | |
st.write(forecast_df.set_index('arrival_date')) | |
elif options == 'Predict Booking Cancellations': | |
st.header('Predict Booking Cancellations') | |
st.write('Provide input data to predict if a booking will be canceled.') | |
input_data = {} | |
for col in df.drop(columns=['is_canceled']).columns: | |
input_data[col] = st.text_input(f'{col}:', value='0') | |
input_df = pd.DataFrame(input_data, index=[0]) | |
prediction = random_forest_model.predict(input_df) | |
st.write('Prediction:', 'Canceled' if prediction[0] else 'Not Canceled') | |
elif options == 'Market Segmentation': | |
st.header('Market Segmentation') | |
segmentation_features = df[['total_guests', 'total_of_special_requests', 'lead_time', 'is_repeated_guest']] | |
scaler = StandardScaler() | |
segmentation_features_scaled = scaler.fit_transform(segmentation_features) | |
kmeans = KMeans(n_clusters=4, random_state=42) | |
df['customer_segment'] = kmeans.fit_predict(segmentation_features_scaled) | |
plt.figure(figsize=(10, 5)) | |
sns.scatterplot(x=segmentation_features_scaled[:, 0], y=segmentation_features_scaled[:, 1], hue=df['customer_segment'], palette='viridis') | |
plt.title('Customer Segmentation') | |
plt.xlabel('Total Guests (Standardized)') | |
plt.ylabel('Total Special Requests (Standardized)') | |
st.pyplot(plt) | |
elif options == 'Customer Lifetime Value': | |
st.header('Customer Lifetime Value') | |
clv_df = df.groupby('customer_id')['revenue'].sum().reset_index() | |
clv_df.columns = ['customer_id', 'lifetime_value'] | |
plt.figure(figsize=(10, 5)) | |
sns.histplot(clv_df['lifetime_value'], kde=True) | |
plt.title('Customer Lifetime Value Distribution') | |
plt.xlabel('Lifetime Value') | |
plt.ylabel('Frequency') | |
st.pyplot(plt) | |