Spaces:

mayankraghav
/

project

Runtime error

App Files Files Community

mayankraghav commited on Jul 21

Commit

aea9fdb

•

1 Parent(s): 9bd2d1f

modefied app file

Browse files

Files changed (1) hide show

app.py +31 -28

app.py CHANGED Viewed

@@ -9,6 +9,9 @@ from datetime import timedelta
 from pandas.tseries.offsets import MonthEnd
 from statsmodels.tsa.statespace.sarimax import SARIMAX
 from statsmodels.tsa.stattools import adfuller
 from sklearn.cluster import KMeans
 from sklearn.preprocessing import StandardScaler
@@ -24,29 +27,21 @@ if options == 'Overview':
     st.write('This app provides insights and predictions for hotel bookings.')
 elif options == 'Revenue Forecasting':
-    # Streamlit app title
-    st.title('Hotel Booking Revenue Forecasting with SARIMA')
-    # File uploader
     uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
     if uploaded_file is not None:
-        # Load the dataset
         data = pd.read_csv(uploaded_file)
-        # Display the first few rows of the dataset
         st.write("## Dataset Preview")
         st.write(data.head())
-        # Convert arrival_date_year and arrival_date_month to a datetime format
         data['arrival_date'] = pd.to_datetime(data['arrival_date_year'].astype(str) + '-' +
                                               data['arrival_date_month'].astype(str) + '-01')
         data['arrival_date'] += MonthEnd(0)
-        # Calculate monthly revenue
         monthly_revenue = data[data['is_canceled'] == 0].groupby('arrival_date')['adr'].sum().reset_index()
-        # Plot monthly revenue
         st.write("## Monthly Revenue")
         plt.figure(figsize=(12, 6))
         sns.lineplot(x='arrival_date', y='adr', data=monthly_revenue)
@@ -57,16 +52,13 @@ elif options == 'Revenue Forecasting':
         plt.tight_layout()
         st.pyplot(plt)
-        # Check for stationarity
         result = adfuller(monthly_revenue['adr'])
         st.write(f'## ADF Statistic: {result[0]}')
         st.write(f'## p-value: {result[1]}')
-        # If the series is not stationary, take the first difference
         if result[1] > 0.05:
             monthly_revenue['adr_diff'] = monthly_revenue['adr'].diff().dropna()
-        # Model parameters
         p = st.slider('AR order (p)', 0, 5, 1)
         d = st.slider('Differencing order (d)', 0, 2, 1)
         q = st.slider('MA order (q)', 0, 5, 1)
@@ -74,14 +66,12 @@ elif options == 'Revenue Forecasting':
         D = st.slider('Seasonal differencing order (D)', 0, 2, 1)
         Q = st.slider('Seasonal MA order (Q)', 0, 2, 1)
-        # Fit the SARIMA model with user-defined parameters
         model = SARIMAX(monthly_revenue['adr'],
                         order=(p, d, q),
                         seasonal_order=(P, D, Q, 12))
         model_fit = model.fit(disp=False)
-        # Make predictions
-        forecast_steps = 12  # Forecast for the next 12 months
         forecast = model_fit.get_forecast(steps=forecast_steps)
         forecast_index = pd.date_range(start=monthly_revenue['arrival_date'].max() + pd.DateOffset(months=1),
                                        periods=forecast_steps, freq='M')
@@ -89,7 +79,6 @@ elif options == 'Revenue Forecasting':
         forecast_df = pd.DataFrame({'arrival_date': forecast_index,
                                     'forecast': forecast.predicted_mean})
-        # Plot the results
         st.write("## Revenue Forecast")
         plt.figure(figsize=(12, 6))
         sns.lineplot(x='arrival_date', y='adr', data=monthly_revenue, label='Historical Revenue')
@@ -102,7 +91,6 @@ elif options == 'Revenue Forecasting':
         plt.tight_layout()
         st.pyplot(plt)
-        # Display forecasted values
         st.write("## Forecasted Revenue for the Next 12 Months")
         st.write(forecast_df.set_index('arrival_date'))
@@ -112,20 +100,35 @@ elif options == 'Predict Booking Cancellations':
     uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
     if uploaded_file is not None:
-        # Load the dataset
         data = pd.read_csv(uploaded_file)
         st.write("## Dataset Preview")
         st.write(data.head())
-        # Load the trained model
-        with open('random_forest_model.pkl', 'rb') as file:
-            model = pickle.load(file)
-        st.write("## Provide input data to predict if a booking will be canceled.")
         input_data = {}
-        for col in data.columns:
-            input_data[col] = st.text_input(f'{col}:', value='0')
         input_df = pd.DataFrame(input_data, index=[0])
         prediction = model.predict(input_df)
@@ -137,11 +140,11 @@ elif options == 'Market Segmentation':
     uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
     if uploaded_file is not None:
-        # Load the dataset
         data = pd.read_csv(uploaded_file)
         st.write("## Dataset Preview")
         st.write(data.head())
         segmentation_features = data[['total_guests', 'total_of_special_requests', 'lead_time', 'is_repeated_guest']]
         scaler = StandardScaler()
         segmentation_features_scaled = scaler.fit_transform(segmentation_features)
@@ -165,12 +168,12 @@ elif options == 'Customer Lifetime Value':
     uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
     if uploaded_file is not None:
-        # Load the dataset
         data = pd.read_csv(uploaded_file)
         st.write("## Dataset Preview")
         st.write(data.head())
-        clv_df = data.groupby('customer_id')['revenue'].sum().reset_index()
         clv_df.columns = ['customer_id', 'lifetime_value']
         st.write("## Customer Lifetime Value Distribution")

 from pandas.tseries.offsets import MonthEnd
 from statsmodels.tsa.statespace.sarimax import SARIMAX
 from statsmodels.tsa.stattools import adfuller
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.model_selection import train_test_split
+from sklearn.metrics import accuracy_score
 from sklearn.cluster import KMeans
 from sklearn.preprocessing import StandardScaler
     st.write('This app provides insights and predictions for hotel bookings.')
 elif options == 'Revenue Forecasting':
+    st.header('Hotel Booking Revenue Forecasting with SARIMA')
     uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
     if uploaded_file is not None:
         data = pd.read_csv(uploaded_file)
         st.write("## Dataset Preview")
         st.write(data.head())
         data['arrival_date'] = pd.to_datetime(data['arrival_date_year'].astype(str) + '-' +
                                               data['arrival_date_month'].astype(str) + '-01')
         data['arrival_date'] += MonthEnd(0)
         monthly_revenue = data[data['is_canceled'] == 0].groupby('arrival_date')['adr'].sum().reset_index()
         st.write("## Monthly Revenue")
         plt.figure(figsize=(12, 6))
         sns.lineplot(x='arrival_date', y='adr', data=monthly_revenue)
         plt.tight_layout()
         st.pyplot(plt)
         result = adfuller(monthly_revenue['adr'])
         st.write(f'## ADF Statistic: {result[0]}')
         st.write(f'## p-value: {result[1]}')
         if result[1] > 0.05:
             monthly_revenue['adr_diff'] = monthly_revenue['adr'].diff().dropna()
         p = st.slider('AR order (p)', 0, 5, 1)
         d = st.slider('Differencing order (d)', 0, 2, 1)
         q = st.slider('MA order (q)', 0, 5, 1)
         D = st.slider('Seasonal differencing order (D)', 0, 2, 1)
         Q = st.slider('Seasonal MA order (Q)', 0, 2, 1)
         model = SARIMAX(monthly_revenue['adr'],
                         order=(p, d, q),
                         seasonal_order=(P, D, Q, 12))
         model_fit = model.fit(disp=False)
+        forecast_steps = 12
         forecast = model_fit.get_forecast(steps=forecast_steps)
         forecast_index = pd.date_range(start=monthly_revenue['arrival_date'].max() + pd.DateOffset(months=1),
                                        periods=forecast_steps, freq='M')
         forecast_df = pd.DataFrame({'arrival_date': forecast_index,
                                     'forecast': forecast.predicted_mean})
         st.write("## Revenue Forecast")
         plt.figure(figsize=(12, 6))
         sns.lineplot(x='arrival_date', y='adr', data=monthly_revenue, label='Historical Revenue')
         plt.tight_layout()
         st.pyplot(plt)
         st.write("## Forecasted Revenue for the Next 12 Months")
         st.write(forecast_df.set_index('arrival_date'))
     uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
     if uploaded_file is not None:
         data = pd.read_csv(uploaded_file)
         st.write("## Dataset Preview")
         st.write(data.head())
+        features = ['lead_time', 'arrival_date_year', 'arrival_date_week_number',
+                    'arrival_date_day_of_month', 'stays_in_weekend_nights',
+                    'stays_in_week_nights', 'adults', 'children', 'babies',
+                    'is_repeated_guest', 'previous_cancellations',
+                    'previous_bookings_not_canceled', 'booking_changes',
+                    'days_in_waiting_list', 'adr', 'required_car_parking_spaces',
+                    'total_of_special_requests']
+        X = data[features]
+        y = data['is_canceled']
+        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+        model = RandomForestClassifier(n_estimators=100, random_state=42)
+        model.fit(X_train, y_train)
+        y_pred = model.predict(X_test)
+        accuracy = accuracy_score(y_test, y_pred)
+        st.write(f'Model Accuracy: {accuracy:.2f}')
+        st.write("## Predict Booking Cancellation for New Data")
         input_data = {}
+        for col in features:
+            input_data[col] = float(st.text_input(f'{col}:', value='0'))
         input_df = pd.DataFrame(input_data, index=[0])
         prediction = model.predict(input_df)
     uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
     if uploaded_file is not None:
         data = pd.read_csv(uploaded_file)
         st.write("## Dataset Preview")
         st.write(data.head())
+        data['total_guests'] = data['adults'] + data['children'] + data['babies']
         segmentation_features = data[['total_guests', 'total_of_special_requests', 'lead_time', 'is_repeated_guest']]
         scaler = StandardScaler()
         segmentation_features_scaled = scaler.fit_transform(segmentation_features)
     uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
     if uploaded_file is not None:
         data = pd.read_csv(uploaded_file)
         st.write("## Dataset Preview")
         st.write(data.head())
+        data['customer_id'] = data['lead_time'].astype(str) + '_' + data['arrival_date_year'].astype(str) + '_' + data['arrival_date_month'].astype(str) + '_' + data['arrival_date_day_of_month'].astype(str)
+        clv_df = data.groupby('customer_id')['adr'].sum().reset_index()
         clv_df.columns = ['customer_id', 'lifetime_value']
         st.write("## Customer Lifetime Value Distribution")