Spaces:
Runtime error
Runtime error
mayankraghav
commited on
Commit
•
9bd2d1f
1
Parent(s):
0f3d4df
modefied app file
Browse files
app.py
CHANGED
@@ -9,20 +9,8 @@ from datetime import timedelta
|
|
9 |
from pandas.tseries.offsets import MonthEnd
|
10 |
from statsmodels.tsa.statespace.sarimax import SARIMAX
|
11 |
from statsmodels.tsa.stattools import adfuller
|
12 |
-
|
13 |
-
|
14 |
-
# with open('./revenue_forcast.pkl', 'rb') as file:
|
15 |
-
# arima_model = pickle.load(file)
|
16 |
-
|
17 |
-
# # Load data
|
18 |
-
# file_path = './Dataset/hotel_booking.csv'
|
19 |
-
# df = pd.read_csv(file_path)
|
20 |
-
|
21 |
-
# # Preprocess data for Streamlit
|
22 |
-
# numerical_cols = df.select_dtypes(include=['float64', 'int64']).columns
|
23 |
-
# categorical_cols = df.select_dtypes(include=['object']).columns
|
24 |
-
# for col in categorical_cols:
|
25 |
-
# df[col] = df[col].astype('category')
|
26 |
|
27 |
# Streamlit app
|
28 |
st.title('Hotel Booking Analysis')
|
@@ -75,7 +63,8 @@ elif options == 'Revenue Forecasting':
|
|
75 |
st.write(f'## p-value: {result[1]}')
|
76 |
|
77 |
# If the series is not stationary, take the first difference
|
78 |
-
|
|
|
79 |
|
80 |
# Model parameters
|
81 |
p = st.slider('AR order (p)', 0, 5, 1)
|
@@ -94,7 +83,7 @@ elif options == 'Revenue Forecasting':
|
|
94 |
# Make predictions
|
95 |
forecast_steps = 12 # Forecast for the next 12 months
|
96 |
forecast = model_fit.get_forecast(steps=forecast_steps)
|
97 |
-
forecast_index = pd.date_range(start=monthly_revenue['arrival_date'].max(),
|
98 |
periods=forecast_steps, freq='M')
|
99 |
|
100 |
forecast_df = pd.DataFrame({'arrival_date': forecast_index,
|
@@ -117,43 +106,77 @@ elif options == 'Revenue Forecasting':
|
|
117 |
st.write("## Forecasted Revenue for the Next 12 Months")
|
118 |
st.write(forecast_df.set_index('arrival_date'))
|
119 |
|
120 |
-
|
121 |
elif options == 'Predict Booking Cancellations':
|
122 |
st.header('Predict Booking Cancellations')
|
123 |
-
st.write('Provide input data to predict if a booking will be canceled.')
|
124 |
|
125 |
-
|
126 |
-
|
127 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
128 |
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
|
133 |
elif options == 'Market Segmentation':
|
134 |
st.header('Market Segmentation')
|
135 |
-
|
136 |
-
|
137 |
-
segmentation_features_scaled = scaler.fit_transform(segmentation_features)
|
138 |
-
|
139 |
-
kmeans = KMeans(n_clusters=4, random_state=42)
|
140 |
-
df['customer_segment'] = kmeans.fit_predict(segmentation_features_scaled)
|
141 |
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
148 |
|
149 |
elif options == 'Customer Lifetime Value':
|
150 |
st.header('Customer Lifetime Value')
|
151 |
-
|
152 |
-
|
153 |
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
from pandas.tseries.offsets import MonthEnd
|
10 |
from statsmodels.tsa.statespace.sarimax import SARIMAX
|
11 |
from statsmodels.tsa.stattools import adfuller
|
12 |
+
from sklearn.cluster import KMeans
|
13 |
+
from sklearn.preprocessing import StandardScaler
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
|
15 |
# Streamlit app
|
16 |
st.title('Hotel Booking Analysis')
|
|
|
63 |
st.write(f'## p-value: {result[1]}')
|
64 |
|
65 |
# If the series is not stationary, take the first difference
|
66 |
+
if result[1] > 0.05:
|
67 |
+
monthly_revenue['adr_diff'] = monthly_revenue['adr'].diff().dropna()
|
68 |
|
69 |
# Model parameters
|
70 |
p = st.slider('AR order (p)', 0, 5, 1)
|
|
|
83 |
# Make predictions
|
84 |
forecast_steps = 12 # Forecast for the next 12 months
|
85 |
forecast = model_fit.get_forecast(steps=forecast_steps)
|
86 |
+
forecast_index = pd.date_range(start=monthly_revenue['arrival_date'].max() + pd.DateOffset(months=1),
|
87 |
periods=forecast_steps, freq='M')
|
88 |
|
89 |
forecast_df = pd.DataFrame({'arrival_date': forecast_index,
|
|
|
106 |
st.write("## Forecasted Revenue for the Next 12 Months")
|
107 |
st.write(forecast_df.set_index('arrival_date'))
|
108 |
|
|
|
109 |
elif options == 'Predict Booking Cancellations':
|
110 |
st.header('Predict Booking Cancellations')
|
|
|
111 |
|
112 |
+
uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
|
113 |
+
|
114 |
+
if uploaded_file is not None:
|
115 |
+
# Load the dataset
|
116 |
+
data = pd.read_csv(uploaded_file)
|
117 |
+
st.write("## Dataset Preview")
|
118 |
+
st.write(data.head())
|
119 |
+
|
120 |
+
# Load the trained model
|
121 |
+
with open('random_forest_model.pkl', 'rb') as file:
|
122 |
+
model = pickle.load(file)
|
123 |
+
|
124 |
+
st.write("## Provide input data to predict if a booking will be canceled.")
|
125 |
+
|
126 |
+
input_data = {}
|
127 |
+
for col in data.columns:
|
128 |
+
input_data[col] = st.text_input(f'{col}:', value='0')
|
129 |
|
130 |
+
input_df = pd.DataFrame(input_data, index=[0])
|
131 |
+
prediction = model.predict(input_df)
|
132 |
+
st.write('Prediction:', 'Canceled' if prediction[0] else 'Not Canceled')
|
133 |
|
134 |
elif options == 'Market Segmentation':
|
135 |
st.header('Market Segmentation')
|
136 |
+
|
137 |
+
uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
|
|
|
|
|
|
|
|
|
138 |
|
139 |
+
if uploaded_file is not None:
|
140 |
+
# Load the dataset
|
141 |
+
data = pd.read_csv(uploaded_file)
|
142 |
+
st.write("## Dataset Preview")
|
143 |
+
st.write(data.head())
|
144 |
+
|
145 |
+
segmentation_features = data[['total_guests', 'total_of_special_requests', 'lead_time', 'is_repeated_guest']]
|
146 |
+
scaler = StandardScaler()
|
147 |
+
segmentation_features_scaled = scaler.fit_transform(segmentation_features)
|
148 |
+
|
149 |
+
kmeans = KMeans(n_clusters=4, random_state=42)
|
150 |
+
data['customer_segment'] = kmeans.fit_predict(segmentation_features_scaled)
|
151 |
+
|
152 |
+
st.write("## Customer Segmentation Results")
|
153 |
+
st.write(data[['customer_segment']].head())
|
154 |
+
|
155 |
+
plt.figure(figsize=(10, 5))
|
156 |
+
sns.scatterplot(x=segmentation_features_scaled[:, 0], y=segmentation_features_scaled[:, 1], hue=data['customer_segment'], palette='viridis')
|
157 |
+
plt.title('Customer Segmentation')
|
158 |
+
plt.xlabel('Total Guests (Standardized)')
|
159 |
+
plt.ylabel('Total Special Requests (Standardized)')
|
160 |
+
st.pyplot(plt)
|
161 |
|
162 |
elif options == 'Customer Lifetime Value':
|
163 |
st.header('Customer Lifetime Value')
|
164 |
+
|
165 |
+
uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
|
166 |
|
167 |
+
if uploaded_file is not None:
|
168 |
+
# Load the dataset
|
169 |
+
data = pd.read_csv(uploaded_file)
|
170 |
+
st.write("## Dataset Preview")
|
171 |
+
st.write(data.head())
|
172 |
+
|
173 |
+
clv_df = data.groupby('customer_id')['revenue'].sum().reset_index()
|
174 |
+
clv_df.columns = ['customer_id', 'lifetime_value']
|
175 |
+
|
176 |
+
st.write("## Customer Lifetime Value Distribution")
|
177 |
+
plt.figure(figsize=(10, 5))
|
178 |
+
sns.histplot(clv_df['lifetime_value'], kde=True)
|
179 |
+
plt.title('Customer Lifetime Value Distribution')
|
180 |
+
plt.xlabel('Lifetime Value')
|
181 |
+
plt.ylabel('Frequency')
|
182 |
+
st.pyplot(plt)
|