mayankraghav commited on
Commit
b965b34
1 Parent(s): 8f57fb2

Add application file

Browse files
Files changed (1) hide show
  1. app.py +159 -0
app.py ADDED
@@ -0,0 +1,159 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
+ import streamlit as st
3
+ import pandas as pd
4
+ import numpy as np
5
+ import pickle
6
+ import matplotlib.pyplot as plt
7
+ import seaborn as sns
8
+ from datetime import timedelta
9
+ from pandas.tseries.offsets import MonthEnd
10
+ from statsmodels.tsa.statespace.sarimax import SARIMAX
11
+ from statsmodels.tsa.stattools import adfuller
12
+
13
+ # # Load models
14
+ # with open('./revenue_forcast.pkl', 'rb') as file:
15
+ # arima_model = pickle.load(file)
16
+
17
+ # # Load data
18
+ # file_path = './Dataset/hotel_booking.csv'
19
+ # df = pd.read_csv(file_path)
20
+
21
+ # Preprocess data for Streamlit
22
+ numerical_cols = df.select_dtypes(include=['float64', 'int64']).columns
23
+ categorical_cols = df.select_dtypes(include=['object']).columns
24
+ for col in categorical_cols:
25
+ df[col] = df[col].astype('category')
26
+
27
+ # Streamlit app
28
+ st.title('Hotel Booking Analysis')
29
+
30
+ # Navigation
31
+ st.sidebar.title('Navigation')
32
+ options = st.sidebar.radio('Select a page:', ['Overview', 'Revenue Forecasting', 'Predict Booking Cancellations', 'Market Segmentation', 'Customer Lifetime Value'])
33
+
34
+ if options == 'Overview':
35
+ st.header('Overview')
36
+ st.write('This app provides insights and predictions for hotel bookings.')
37
+
38
+ elif options == 'Revenue Forecasting':
39
+ # Streamlit app title
40
+ st.title('Hotel Booking Revenue Forecasting with SARIMA')
41
+
42
+ # File uploader
43
+ uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
44
+
45
+ if uploaded_file is not None:
46
+ # Load the dataset
47
+ data = pd.read_csv(uploaded_file)
48
+
49
+ # Display the first few rows of the dataset
50
+ st.write("## Dataset Preview")
51
+ st.write(data.head())
52
+
53
+ # Convert arrival_date_year and arrival_date_month to a datetime format
54
+ data['arrival_date'] = pd.to_datetime(data['arrival_date_year'].astype(str) + '-' +
55
+ data['arrival_date_month'].astype(str) + '-01')
56
+ data['arrival_date'] += MonthEnd(0)
57
+
58
+ # Calculate monthly revenue
59
+ monthly_revenue = data[data['is_canceled'] == 0].groupby('arrival_date')['adr'].sum().reset_index()
60
+
61
+ # Plot monthly revenue
62
+ st.write("## Monthly Revenue")
63
+ plt.figure(figsize=(12, 6))
64
+ sns.lineplot(x='arrival_date', y='adr', data=monthly_revenue)
65
+ plt.title('Monthly Revenue')
66
+ plt.xlabel('Month')
67
+ plt.ylabel('Revenue')
68
+ plt.xticks(rotation=45)
69
+ plt.tight_layout()
70
+ st.pyplot(plt)
71
+
72
+ # Check for stationarity
73
+ result = adfuller(monthly_revenue['adr'])
74
+ st.write(f'## ADF Statistic: {result[0]}')
75
+ st.write(f'## p-value: {result[1]}')
76
+
77
+ # If the series is not stationary, take the first difference
78
+ monthly_revenue['adr_diff'] = monthly_revenue['adr'].diff().dropna()
79
+
80
+ # Model parameters
81
+ p = st.slider('AR order (p)', 0, 5, 1)
82
+ d = st.slider('Differencing order (d)', 0, 2, 1)
83
+ q = st.slider('MA order (q)', 0, 5, 1)
84
+ P = st.slider('Seasonal AR order (P)', 0, 2, 1)
85
+ D = st.slider('Seasonal differencing order (D)', 0, 2, 1)
86
+ Q = st.slider('Seasonal MA order (Q)', 0, 2, 1)
87
+
88
+ # Fit the SARIMA model with user-defined parameters
89
+ model = SARIMAX(monthly_revenue['adr'],
90
+ order=(p, d, q),
91
+ seasonal_order=(P, D, Q, 12))
92
+ model_fit = model.fit(disp=False)
93
+
94
+ # Make predictions
95
+ forecast_steps = 12 # Forecast for the next 12 months
96
+ forecast = model_fit.get_forecast(steps=forecast_steps)
97
+ forecast_index = pd.date_range(start=monthly_revenue['arrival_date'].max(),
98
+ periods=forecast_steps, freq='M')
99
+
100
+ forecast_df = pd.DataFrame({'arrival_date': forecast_index,
101
+ 'forecast': forecast.predicted_mean})
102
+
103
+ # Plot the results
104
+ st.write("## Revenue Forecast")
105
+ plt.figure(figsize=(12, 6))
106
+ sns.lineplot(x='arrival_date', y='adr', data=monthly_revenue, label='Historical Revenue')
107
+ sns.lineplot(x='arrival_date', y='forecast', data=forecast_df, label='Forecasted Revenue')
108
+ plt.title('Revenue Forecast')
109
+ plt.xlabel('Month')
110
+ plt.ylabel('Revenue')
111
+ plt.xticks(rotation=45)
112
+ plt.legend()
113
+ plt.tight_layout()
114
+ st.pyplot(plt)
115
+
116
+ # Display forecasted values
117
+ st.write("## Forecasted Revenue for the Next 12 Months")
118
+ st.write(forecast_df.set_index('arrival_date'))
119
+
120
+
121
+ elif options == 'Predict Booking Cancellations':
122
+ st.header('Predict Booking Cancellations')
123
+ st.write('Provide input data to predict if a booking will be canceled.')
124
+
125
+ input_data = {}
126
+ for col in df.drop(columns=['is_canceled']).columns:
127
+ input_data[col] = st.text_input(f'{col}:', value='0')
128
+
129
+ input_df = pd.DataFrame(input_data, index=[0])
130
+ prediction = random_forest_model.predict(input_df)
131
+ st.write('Prediction:', 'Canceled' if prediction[0] else 'Not Canceled')
132
+
133
+ elif options == 'Market Segmentation':
134
+ st.header('Market Segmentation')
135
+ segmentation_features = df[['total_guests', 'total_of_special_requests', 'lead_time', 'is_repeated_guest']]
136
+ scaler = StandardScaler()
137
+ segmentation_features_scaled = scaler.fit_transform(segmentation_features)
138
+
139
+ kmeans = KMeans(n_clusters=4, random_state=42)
140
+ df['customer_segment'] = kmeans.fit_predict(segmentation_features_scaled)
141
+
142
+ plt.figure(figsize=(10, 5))
143
+ sns.scatterplot(x=segmentation_features_scaled[:, 0], y=segmentation_features_scaled[:, 1], hue=df['customer_segment'], palette='viridis')
144
+ plt.title('Customer Segmentation')
145
+ plt.xlabel('Total Guests (Standardized)')
146
+ plt.ylabel('Total Special Requests (Standardized)')
147
+ st.pyplot(plt)
148
+
149
+ elif options == 'Customer Lifetime Value':
150
+ st.header('Customer Lifetime Value')
151
+ clv_df = df.groupby('customer_id')['revenue'].sum().reset_index()
152
+ clv_df.columns = ['customer_id', 'lifetime_value']
153
+
154
+ plt.figure(figsize=(10, 5))
155
+ sns.histplot(clv_df['lifetime_value'], kde=True)
156
+ plt.title('Customer Lifetime Value Distribution')
157
+ plt.xlabel('Lifetime Value')
158
+ plt.ylabel('Frequency')
159
+ st.pyplot(plt)