Spaces:

mayankraghav
/

project

Runtime error

App Files Files Community

mayankraghav commited on Jul 21

Commit

9bd2d1f

•

1 Parent(s): 0f3d4df

modefied app file

Browse files

Files changed (1) hide show

app.py +67 -44

app.py CHANGED Viewed

@@ -9,20 +9,8 @@ from datetime import timedelta
 from pandas.tseries.offsets import MonthEnd
 from statsmodels.tsa.statespace.sarimax import SARIMAX
 from statsmodels.tsa.stattools import adfuller
-# # Load models
-# with open('./revenue_forcast.pkl', 'rb') as file:
-#     arima_model = pickle.load(file)
-# # Load data
-# file_path = './Dataset/hotel_booking.csv'
-# df = pd.read_csv(file_path)
-# # Preprocess data for Streamlit
-# numerical_cols = df.select_dtypes(include=['float64', 'int64']).columns
-# categorical_cols = df.select_dtypes(include=['object']).columns
-# for col in categorical_cols:
-#     df[col] = df[col].astype('category')
 # Streamlit app
 st.title('Hotel Booking Analysis')
@@ -75,7 +63,8 @@ elif options == 'Revenue Forecasting':
         st.write(f'## p-value: {result[1]}')
         # If the series is not stationary, take the first difference
-        monthly_revenue['adr_diff'] = monthly_revenue['adr'].diff().dropna()
         # Model parameters
         p = st.slider('AR order (p)', 0, 5, 1)
@@ -94,7 +83,7 @@ elif options == 'Revenue Forecasting':
         # Make predictions
         forecast_steps = 12  # Forecast for the next 12 months
         forecast = model_fit.get_forecast(steps=forecast_steps)
-        forecast_index = pd.date_range(start=monthly_revenue['arrival_date'].max(),
                                        periods=forecast_steps, freq='M')
         forecast_df = pd.DataFrame({'arrival_date': forecast_index,
@@ -117,43 +106,77 @@ elif options == 'Revenue Forecasting':
         st.write("## Forecasted Revenue for the Next 12 Months")
         st.write(forecast_df.set_index('arrival_date'))
 elif options == 'Predict Booking Cancellations':
     st.header('Predict Booking Cancellations')
-    st.write('Provide input data to predict if a booking will be canceled.')
-    input_data = {}
-    for col in df.drop(columns=['is_canceled']).columns:
-        input_data[col] = st.text_input(f'{col}:', value='0')
-    input_df = pd.DataFrame(input_data, index=[0])
-    prediction = random_forest_model.predict(input_df)
-    st.write('Prediction:', 'Canceled' if prediction[0] else 'Not Canceled')
 elif options == 'Market Segmentation':
     st.header('Market Segmentation')
-    segmentation_features = df[['total_guests', 'total_of_special_requests', 'lead_time', 'is_repeated_guest']]
-    scaler = StandardScaler()
-    segmentation_features_scaled = scaler.fit_transform(segmentation_features)
-    kmeans = KMeans(n_clusters=4, random_state=42)
-    df['customer_segment'] = kmeans.fit_predict(segmentation_features_scaled)
-    plt.figure(figsize=(10, 5))
-    sns.scatterplot(x=segmentation_features_scaled[:, 0], y=segmentation_features_scaled[:, 1], hue=df['customer_segment'], palette='viridis')
-    plt.title('Customer Segmentation')
-    plt.xlabel('Total Guests (Standardized)')
-    plt.ylabel('Total Special Requests (Standardized)')
-    st.pyplot(plt)
 elif options == 'Customer Lifetime Value':
     st.header('Customer Lifetime Value')
-    clv_df = df.groupby('customer_id')['revenue'].sum().reset_index()
-    clv_df.columns = ['customer_id', 'lifetime_value']
-    plt.figure(figsize=(10, 5))
-    sns.histplot(clv_df['lifetime_value'], kde=True)
-    plt.title('Customer Lifetime Value Distribution')
-    plt.xlabel('Lifetime Value')
-    plt.ylabel('Frequency')
-    st.pyplot(plt)

 from pandas.tseries.offsets import MonthEnd
 from statsmodels.tsa.statespace.sarimax import SARIMAX
 from statsmodels.tsa.stattools import adfuller
+from sklearn.cluster import KMeans
+from sklearn.preprocessing import StandardScaler
 # Streamlit app
 st.title('Hotel Booking Analysis')
         st.write(f'## p-value: {result[1]}')
         # If the series is not stationary, take the first difference
+        if result[1] > 0.05:
+            monthly_revenue['adr_diff'] = monthly_revenue['adr'].diff().dropna()
         # Model parameters
         p = st.slider('AR order (p)', 0, 5, 1)
         # Make predictions
         forecast_steps = 12  # Forecast for the next 12 months
         forecast = model_fit.get_forecast(steps=forecast_steps)
+        forecast_index = pd.date_range(start=monthly_revenue['arrival_date'].max() + pd.DateOffset(months=1),
                                        periods=forecast_steps, freq='M')
         forecast_df = pd.DataFrame({'arrival_date': forecast_index,
         st.write("## Forecasted Revenue for the Next 12 Months")
         st.write(forecast_df.set_index('arrival_date'))
 elif options == 'Predict Booking Cancellations':
     st.header('Predict Booking Cancellations')
+    uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
+    if uploaded_file is not None:
+        # Load the dataset
+        data = pd.read_csv(uploaded_file)
+        st.write("## Dataset Preview")
+        st.write(data.head())
+        # Load the trained model
+        with open('random_forest_model.pkl', 'rb') as file:
+            model = pickle.load(file)
+        st.write("## Provide input data to predict if a booking will be canceled.")
+        input_data = {}
+        for col in data.columns:
+            input_data[col] = st.text_input(f'{col}:', value='0')
+        input_df = pd.DataFrame(input_data, index=[0])
+        prediction = model.predict(input_df)
+        st.write('Prediction:', 'Canceled' if prediction[0] else 'Not Canceled')
 elif options == 'Market Segmentation':
     st.header('Market Segmentation')
+    uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
+    if uploaded_file is not None:
+        # Load the dataset
+        data = pd.read_csv(uploaded_file)
+        st.write("## Dataset Preview")
+        st.write(data.head())
+        segmentation_features = data[['total_guests', 'total_of_special_requests', 'lead_time', 'is_repeated_guest']]
+        scaler = StandardScaler()
+        segmentation_features_scaled = scaler.fit_transform(segmentation_features)
+        kmeans = KMeans(n_clusters=4, random_state=42)
+        data['customer_segment'] = kmeans.fit_predict(segmentation_features_scaled)
+        st.write("## Customer Segmentation Results")
+        st.write(data[['customer_segment']].head())
+        plt.figure(figsize=(10, 5))
+        sns.scatterplot(x=segmentation_features_scaled[:, 0], y=segmentation_features_scaled[:, 1], hue=data['customer_segment'], palette='viridis')
+        plt.title('Customer Segmentation')
+        plt.xlabel('Total Guests (Standardized)')
+        plt.ylabel('Total Special Requests (Standardized)')
+        st.pyplot(plt)
 elif options == 'Customer Lifetime Value':
     st.header('Customer Lifetime Value')
+    uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
+    if uploaded_file is not None:
+        # Load the dataset
+        data = pd.read_csv(uploaded_file)
+        st.write("## Dataset Preview")
+        st.write(data.head())
+        clv_df = data.groupby('customer_id')['revenue'].sum().reset_index()
+        clv_df.columns = ['customer_id', 'lifetime_value']
+        st.write("## Customer Lifetime Value Distribution")
+        plt.figure(figsize=(10, 5))
+        sns.histplot(clv_df['lifetime_value'], kde=True)
+        plt.title('Customer Lifetime Value Distribution')
+        plt.xlabel('Lifetime Value')
+        plt.ylabel('Frequency')
+        st.pyplot(plt)