mayankraghav commited on
Commit
9bd2d1f
1 Parent(s): 0f3d4df

modefied app file

Browse files
Files changed (1) hide show
  1. app.py +67 -44
app.py CHANGED
@@ -9,20 +9,8 @@ from datetime import timedelta
9
  from pandas.tseries.offsets import MonthEnd
10
  from statsmodels.tsa.statespace.sarimax import SARIMAX
11
  from statsmodels.tsa.stattools import adfuller
12
-
13
- # # Load models
14
- # with open('./revenue_forcast.pkl', 'rb') as file:
15
- # arima_model = pickle.load(file)
16
-
17
- # # Load data
18
- # file_path = './Dataset/hotel_booking.csv'
19
- # df = pd.read_csv(file_path)
20
-
21
- # # Preprocess data for Streamlit
22
- # numerical_cols = df.select_dtypes(include=['float64', 'int64']).columns
23
- # categorical_cols = df.select_dtypes(include=['object']).columns
24
- # for col in categorical_cols:
25
- # df[col] = df[col].astype('category')
26
 
27
  # Streamlit app
28
  st.title('Hotel Booking Analysis')
@@ -75,7 +63,8 @@ elif options == 'Revenue Forecasting':
75
  st.write(f'## p-value: {result[1]}')
76
 
77
  # If the series is not stationary, take the first difference
78
- monthly_revenue['adr_diff'] = monthly_revenue['adr'].diff().dropna()
 
79
 
80
  # Model parameters
81
  p = st.slider('AR order (p)', 0, 5, 1)
@@ -94,7 +83,7 @@ elif options == 'Revenue Forecasting':
94
  # Make predictions
95
  forecast_steps = 12 # Forecast for the next 12 months
96
  forecast = model_fit.get_forecast(steps=forecast_steps)
97
- forecast_index = pd.date_range(start=monthly_revenue['arrival_date'].max(),
98
  periods=forecast_steps, freq='M')
99
 
100
  forecast_df = pd.DataFrame({'arrival_date': forecast_index,
@@ -117,43 +106,77 @@ elif options == 'Revenue Forecasting':
117
  st.write("## Forecasted Revenue for the Next 12 Months")
118
  st.write(forecast_df.set_index('arrival_date'))
119
 
120
-
121
  elif options == 'Predict Booking Cancellations':
122
  st.header('Predict Booking Cancellations')
123
- st.write('Provide input data to predict if a booking will be canceled.')
124
 
125
- input_data = {}
126
- for col in df.drop(columns=['is_canceled']).columns:
127
- input_data[col] = st.text_input(f'{col}:', value='0')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
128
 
129
- input_df = pd.DataFrame(input_data, index=[0])
130
- prediction = random_forest_model.predict(input_df)
131
- st.write('Prediction:', 'Canceled' if prediction[0] else 'Not Canceled')
132
 
133
  elif options == 'Market Segmentation':
134
  st.header('Market Segmentation')
135
- segmentation_features = df[['total_guests', 'total_of_special_requests', 'lead_time', 'is_repeated_guest']]
136
- scaler = StandardScaler()
137
- segmentation_features_scaled = scaler.fit_transform(segmentation_features)
138
-
139
- kmeans = KMeans(n_clusters=4, random_state=42)
140
- df['customer_segment'] = kmeans.fit_predict(segmentation_features_scaled)
141
 
142
- plt.figure(figsize=(10, 5))
143
- sns.scatterplot(x=segmentation_features_scaled[:, 0], y=segmentation_features_scaled[:, 1], hue=df['customer_segment'], palette='viridis')
144
- plt.title('Customer Segmentation')
145
- plt.xlabel('Total Guests (Standardized)')
146
- plt.ylabel('Total Special Requests (Standardized)')
147
- st.pyplot(plt)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
148
 
149
  elif options == 'Customer Lifetime Value':
150
  st.header('Customer Lifetime Value')
151
- clv_df = df.groupby('customer_id')['revenue'].sum().reset_index()
152
- clv_df.columns = ['customer_id', 'lifetime_value']
153
 
154
- plt.figure(figsize=(10, 5))
155
- sns.histplot(clv_df['lifetime_value'], kde=True)
156
- plt.title('Customer Lifetime Value Distribution')
157
- plt.xlabel('Lifetime Value')
158
- plt.ylabel('Frequency')
159
- st.pyplot(plt)
 
 
 
 
 
 
 
 
 
 
 
9
  from pandas.tseries.offsets import MonthEnd
10
  from statsmodels.tsa.statespace.sarimax import SARIMAX
11
  from statsmodels.tsa.stattools import adfuller
12
+ from sklearn.cluster import KMeans
13
+ from sklearn.preprocessing import StandardScaler
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
  # Streamlit app
16
  st.title('Hotel Booking Analysis')
 
63
  st.write(f'## p-value: {result[1]}')
64
 
65
  # If the series is not stationary, take the first difference
66
+ if result[1] > 0.05:
67
+ monthly_revenue['adr_diff'] = monthly_revenue['adr'].diff().dropna()
68
 
69
  # Model parameters
70
  p = st.slider('AR order (p)', 0, 5, 1)
 
83
  # Make predictions
84
  forecast_steps = 12 # Forecast for the next 12 months
85
  forecast = model_fit.get_forecast(steps=forecast_steps)
86
+ forecast_index = pd.date_range(start=monthly_revenue['arrival_date'].max() + pd.DateOffset(months=1),
87
  periods=forecast_steps, freq='M')
88
 
89
  forecast_df = pd.DataFrame({'arrival_date': forecast_index,
 
106
  st.write("## Forecasted Revenue for the Next 12 Months")
107
  st.write(forecast_df.set_index('arrival_date'))
108
 
 
109
  elif options == 'Predict Booking Cancellations':
110
  st.header('Predict Booking Cancellations')
 
111
 
112
+ uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
113
+
114
+ if uploaded_file is not None:
115
+ # Load the dataset
116
+ data = pd.read_csv(uploaded_file)
117
+ st.write("## Dataset Preview")
118
+ st.write(data.head())
119
+
120
+ # Load the trained model
121
+ with open('random_forest_model.pkl', 'rb') as file:
122
+ model = pickle.load(file)
123
+
124
+ st.write("## Provide input data to predict if a booking will be canceled.")
125
+
126
+ input_data = {}
127
+ for col in data.columns:
128
+ input_data[col] = st.text_input(f'{col}:', value='0')
129
 
130
+ input_df = pd.DataFrame(input_data, index=[0])
131
+ prediction = model.predict(input_df)
132
+ st.write('Prediction:', 'Canceled' if prediction[0] else 'Not Canceled')
133
 
134
  elif options == 'Market Segmentation':
135
  st.header('Market Segmentation')
136
+
137
+ uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
 
 
 
 
138
 
139
+ if uploaded_file is not None:
140
+ # Load the dataset
141
+ data = pd.read_csv(uploaded_file)
142
+ st.write("## Dataset Preview")
143
+ st.write(data.head())
144
+
145
+ segmentation_features = data[['total_guests', 'total_of_special_requests', 'lead_time', 'is_repeated_guest']]
146
+ scaler = StandardScaler()
147
+ segmentation_features_scaled = scaler.fit_transform(segmentation_features)
148
+
149
+ kmeans = KMeans(n_clusters=4, random_state=42)
150
+ data['customer_segment'] = kmeans.fit_predict(segmentation_features_scaled)
151
+
152
+ st.write("## Customer Segmentation Results")
153
+ st.write(data[['customer_segment']].head())
154
+
155
+ plt.figure(figsize=(10, 5))
156
+ sns.scatterplot(x=segmentation_features_scaled[:, 0], y=segmentation_features_scaled[:, 1], hue=data['customer_segment'], palette='viridis')
157
+ plt.title('Customer Segmentation')
158
+ plt.xlabel('Total Guests (Standardized)')
159
+ plt.ylabel('Total Special Requests (Standardized)')
160
+ st.pyplot(plt)
161
 
162
  elif options == 'Customer Lifetime Value':
163
  st.header('Customer Lifetime Value')
164
+
165
+ uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
166
 
167
+ if uploaded_file is not None:
168
+ # Load the dataset
169
+ data = pd.read_csv(uploaded_file)
170
+ st.write("## Dataset Preview")
171
+ st.write(data.head())
172
+
173
+ clv_df = data.groupby('customer_id')['revenue'].sum().reset_index()
174
+ clv_df.columns = ['customer_id', 'lifetime_value']
175
+
176
+ st.write("## Customer Lifetime Value Distribution")
177
+ plt.figure(figsize=(10, 5))
178
+ sns.histplot(clv_df['lifetime_value'], kde=True)
179
+ plt.title('Customer Lifetime Value Distribution')
180
+ plt.xlabel('Lifetime Value')
181
+ plt.ylabel('Frequency')
182
+ st.pyplot(plt)