Spaces:

mayankraghav
/

project

Runtime error

App Files Files Community

mayankraghav commited on Jul 20

Commit

b965b34

•

1 Parent(s): 8f57fb2

Add application file

Browse files

Files changed (1) hide show

app.py +159 -0

app.py ADDED Viewed

	@@ -0,0 +1,159 @@

+# app.py
+import streamlit as st
+import pandas as pd
+import numpy as np
+import pickle
+import matplotlib.pyplot as plt
+import seaborn as sns
+from datetime import timedelta
+from pandas.tseries.offsets import MonthEnd
+from statsmodels.tsa.statespace.sarimax import SARIMAX
+from statsmodels.tsa.stattools import adfuller
+# # Load models
+# with open('./revenue_forcast.pkl', 'rb') as file:
+#     arima_model = pickle.load(file)
+# # Load data
+# file_path = './Dataset/hotel_booking.csv'
+# df = pd.read_csv(file_path)
+# Preprocess data for Streamlit
+numerical_cols = df.select_dtypes(include=['float64', 'int64']).columns
+categorical_cols = df.select_dtypes(include=['object']).columns
+for col in categorical_cols:
+    df[col] = df[col].astype('category')
+# Streamlit app
+st.title('Hotel Booking Analysis')
+# Navigation
+st.sidebar.title('Navigation')
+options = st.sidebar.radio('Select a page:', ['Overview', 'Revenue Forecasting', 'Predict Booking Cancellations', 'Market Segmentation', 'Customer Lifetime Value'])
+if options == 'Overview':
+    st.header('Overview')
+    st.write('This app provides insights and predictions for hotel bookings.')
+elif options == 'Revenue Forecasting':
+    # Streamlit app title
+    st.title('Hotel Booking Revenue Forecasting with SARIMA')
+    # File uploader
+    uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
+    if uploaded_file is not None:
+        # Load the dataset
+        data = pd.read_csv(uploaded_file)
+        # Display the first few rows of the dataset
+        st.write("## Dataset Preview")
+        st.write(data.head())
+        # Convert arrival_date_year and arrival_date_month to a datetime format
+        data['arrival_date'] = pd.to_datetime(data['arrival_date_year'].astype(str) + '-' +
+                                              data['arrival_date_month'].astype(str) + '-01')
+        data['arrival_date'] += MonthEnd(0)
+        # Calculate monthly revenue
+        monthly_revenue = data[data['is_canceled'] == 0].groupby('arrival_date')['adr'].sum().reset_index()
+        # Plot monthly revenue
+        st.write("## Monthly Revenue")
+        plt.figure(figsize=(12, 6))
+        sns.lineplot(x='arrival_date', y='adr', data=monthly_revenue)
+        plt.title('Monthly Revenue')
+        plt.xlabel('Month')
+        plt.ylabel('Revenue')
+        plt.xticks(rotation=45)
+        plt.tight_layout()
+        st.pyplot(plt)
+        # Check for stationarity
+        result = adfuller(monthly_revenue['adr'])
+        st.write(f'## ADF Statistic: {result[0]}')
+        st.write(f'## p-value: {result[1]}')
+        # If the series is not stationary, take the first difference
+        monthly_revenue['adr_diff'] = monthly_revenue['adr'].diff().dropna()
+        # Model parameters
+        p = st.slider('AR order (p)', 0, 5, 1)
+        d = st.slider('Differencing order (d)', 0, 2, 1)
+        q = st.slider('MA order (q)', 0, 5, 1)
+        P = st.slider('Seasonal AR order (P)', 0, 2, 1)
+        D = st.slider('Seasonal differencing order (D)', 0, 2, 1)
+        Q = st.slider('Seasonal MA order (Q)', 0, 2, 1)
+        # Fit the SARIMA model with user-defined parameters
+        model = SARIMAX(monthly_revenue['adr'],
+                        order=(p, d, q),
+                        seasonal_order=(P, D, Q, 12))
+        model_fit = model.fit(disp=False)
+        # Make predictions
+        forecast_steps = 12  # Forecast for the next 12 months
+        forecast = model_fit.get_forecast(steps=forecast_steps)
+        forecast_index = pd.date_range(start=monthly_revenue['arrival_date'].max(),
+                                       periods=forecast_steps, freq='M')
+        forecast_df = pd.DataFrame({'arrival_date': forecast_index,
+                                    'forecast': forecast.predicted_mean})
+        # Plot the results
+        st.write("## Revenue Forecast")
+        plt.figure(figsize=(12, 6))
+        sns.lineplot(x='arrival_date', y='adr', data=monthly_revenue, label='Historical Revenue')
+        sns.lineplot(x='arrival_date', y='forecast', data=forecast_df, label='Forecasted Revenue')
+        plt.title('Revenue Forecast')
+        plt.xlabel('Month')
+        plt.ylabel('Revenue')
+        plt.xticks(rotation=45)
+        plt.legend()
+        plt.tight_layout()
+        st.pyplot(plt)
+        # Display forecasted values
+        st.write("## Forecasted Revenue for the Next 12 Months")
+        st.write(forecast_df.set_index('arrival_date'))
+elif options == 'Predict Booking Cancellations':
+    st.header('Predict Booking Cancellations')
+    st.write('Provide input data to predict if a booking will be canceled.')
+    input_data = {}
+    for col in df.drop(columns=['is_canceled']).columns:
+        input_data[col] = st.text_input(f'{col}:', value='0')
+    input_df = pd.DataFrame(input_data, index=[0])
+    prediction = random_forest_model.predict(input_df)
+    st.write('Prediction:', 'Canceled' if prediction[0] else 'Not Canceled')
+elif options == 'Market Segmentation':
+    st.header('Market Segmentation')
+    segmentation_features = df[['total_guests', 'total_of_special_requests', 'lead_time', 'is_repeated_guest']]
+    scaler = StandardScaler()
+    segmentation_features_scaled = scaler.fit_transform(segmentation_features)
+    kmeans = KMeans(n_clusters=4, random_state=42)
+    df['customer_segment'] = kmeans.fit_predict(segmentation_features_scaled)
+    plt.figure(figsize=(10, 5))
+    sns.scatterplot(x=segmentation_features_scaled[:, 0], y=segmentation_features_scaled[:, 1], hue=df['customer_segment'], palette='viridis')
+    plt.title('Customer Segmentation')
+    plt.xlabel('Total Guests (Standardized)')
+    plt.ylabel('Total Special Requests (Standardized)')
+    st.pyplot(plt)
+elif options == 'Customer Lifetime Value':
+    st.header('Customer Lifetime Value')
+    clv_df = df.groupby('customer_id')['revenue'].sum().reset_index()
+    clv_df.columns = ['customer_id', 'lifetime_value']
+    plt.figure(figsize=(10, 5))
+    sns.histplot(clv_df['lifetime_value'], kde=True)
+    plt.title('Customer Lifetime Value Distribution')
+    plt.xlabel('Lifetime Value')
+    plt.ylabel('Frequency')
+    st.pyplot(plt)