File size: 9,994 Bytes
4796404
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d5a736a
 
 
4796404
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d5a736a
 
4796404
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4b5e3fd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
import io
import folium
import joblib
import datetime
import numpy as np
import gradio as gr
import pandas as pd
from PIL import Image
import matplotlib.pyplot as plt


# Load model and scaler
lgb_model = joblib.load("lgb_occupancy_model.pkl")
scaler = joblib.load("scaler.pkl")

# Load data for historical patterns (you may want to refactor this)
model_df = pd.read_pickle("Cluster_Demand_model_df.pkl")
holiday_dates = pd.read_json("holidays_2022_2025.json")['date']
holiday_dates = pd.to_datetime(holiday_dates).dt.normalize()
holiday_dates = set(holiday_dates)
feature_columns = joblib.load('feature_columns.pkl')

# Load properties data
properties_df = pd.read_csv('CTVNS_Properties.csv')
properties_cols_to_keep = ['Property Name','Property ID', 'Star Rating', 'Property Type', 'Distance from Center','Latitude','Longitude']
properties_filtered_df = properties_df[properties_cols_to_keep].copy()

# Create dropdown options with property name or ID
property_options = properties_filtered_df['Property Name'].astype(str).tolist()

property_type_mapping = {
     'Hotel': 9,
     'Homestay': 7,
     'Guest House': 5,
     'Resort': 11,
     'Hostel': 8,
     'BnB': 2,
     'Villa': 12,
     'Apartment': 1,
     'Apart-hotel': 0,
     'Holiday Home': 6,
     'Cottage': 3,
     'Lodge': 10,
     'Farm House': 4
    }

def forecast_by_property(property_name,adr):
    # Find the selected row
    selected_row = properties_filtered_df[properties_filtered_df['Property Name'].astype(str) == property_name]
    if selected_row.empty:
        return pd.DataFrame({'Error': ['Property Name not found.']})
    
    star_rating = int(selected_row['Star Rating'].values[0])
    property_type_str = selected_row['Property Type'].values[0]
    property_type_cat = property_type_mapping.get(property_type_str, -1)
    distance = float(selected_row['Distance from Center'].values[0])
    lat = selected_row['Latitude']
    lon = selected_row['Longitude']

    # Call your original forecast function
    return forecast(star_rating, property_type_cat, distance,lat,lon,property_name,adr)

def forecast_segment_all_features(starRating, propertyType_cat, distanceFromCenter, model_df, cutoff_date, end_date, scaler, lgb_model, full_feature_cols, X_train, holiday_dates, tolerance=0.1):
    """Forecasts occupancy for a given segment."""
    cluster_hist = model_df[
        (model_df['starRating'] == starRating) &
        (model_df['propertyType_cat'] == propertyType_cat) &
        (np.abs(model_df['distanceFromCenter'] - distanceFromCenter) <= tolerance) &
        (model_df['date'] <= cutoff_date)
    ].sort_values('date')

    if cluster_hist.empty:
        print(f"Warning: No historical data found for segment ({starRating}, {propertyType_cat}, {distanceFromCenter}) up to {cutoff_date}.")
        return None

    extended_series = pd.DataFrame({'date': pd.date_range(start=cluster_hist['date'].min(), end=end_date)})
    extended_series = extended_series.merge(cluster_hist[['date', 'occupiedRooms']], on='date', how='left').rename(columns={'occupiedRooms': 'occupied'})

    for i in range(len(extended_series)):
        current_date = extended_series.at[i, 'date']
        if current_date <= pd.to_datetime(cutoff_date):
            continue

        day_of_week = current_date.dayofweek
        day_of_year = current_date.timetuple().tm_yday
        month = current_date.month
        year = current_date.year

        is_weekend = 1 if day_of_week in [4, 5] else 0
        is_holiday = 1 if current_date in holiday_dates else 0

        day_of_week_sin = np.sin(2 * np.pi * day_of_week / 7)
        day_of_year_sin = np.sin(2 * np.pi * day_of_year / 365.25)
        month_sin = np.sin(2 * np.pi * month / 12)
        base_year = model_df['date'].dt.year.min()
        year_scaled = year - base_year

        lags = {}
        for lag in [1, 7, 15]:
            lags[f'lag_{lag}'] = extended_series.at[i - lag, 'occupied'] if i - lag >= 0 else np.nan

        rolling_stats = {}
        for window in [3, 7, 15]:
            window_data = extended_series['occupied'].iloc[i - window:i] if i >= window else extended_series['occupied'].iloc[:i]
            rolling_stats[f'rolling_{window}_mean'] = window_data.mean() if len(window_data) > 0 else np.nan
            rolling_stats[f'rolling_{window}_std'] = window_data.std(ddof=0) if len(window_data) > 0 else np.nan

        daily_change = extended_series.at[i, 'occupied'] - extended_series.at[i - 1, 'occupied'] if i > 0 and pd.notnull(extended_series.at[i - 1, 'occupied']) and pd.notnull(extended_series.at[i, 'occupied']) else np.nan

        feature_vector = {
            'starRating': starRating, 'distanceFromCenter': distanceFromCenter,
            'day_of_week_sin': day_of_week_sin, 'day_of_year_sin': day_of_year_sin, 'month_sin': month_sin,
            'year_scaled': year_scaled, 'is_weekend': is_weekend, 'is_holiday': is_holiday,
            'lag_1': lags.get('lag_1', np.nan), 'lag_7': lags.get('lag_7', np.nan), 'lag_15': lags.get('lag_15', np.nan),
            'rolling_3_mean': rolling_stats.get('rolling_3_mean', np.nan), 'rolling_3_std': rolling_stats.get('rolling_3_std', np.nan),
            'rolling_7_mean': rolling_stats.get('rolling_7_mean', np.nan), 'rolling_7_std': rolling_stats.get('rolling_7_std', np.nan),
            'rolling_15_mean': rolling_stats.get('rolling_15_mean', np.nan), 'rolling_15_std': rolling_stats.get('rolling_15_std', np.nan),
            'daily_change': daily_change,
        }
        for j in range (10):
            feature_vector[f'prop_type_{j}'] = 1 if propertyType_cat == j else 0

        features = pd.DataFrame([feature_vector])
        features = features.reindex(columns=feature_columns)
        features.fillna(X_train.mean(numeric_only=True), inplace=True)  # or another imputation strategy
        features_scaled = scaler.transform(features)
        pred = lgb_model.predict(features_scaled)[0]
        extended_series.at[i, 'occupied'] = pred
        if i > 0:
            extended_series.at[i, 'daily_change'] = pred - extended_series.at[i - 1, 'occupied']

    future_df = extended_series[extended_series['date'] > pd.to_datetime(cutoff_date)].copy()
    future_df['starRating'] = starRating
    future_df['distanceFromCenter'] = distanceFromCenter
    future_df['propertyType_cat'] = propertyType_cat
    return future_df



def forecast(starRating, propertyType_cat, distanceFromCenter,lat,lon,property_name,adr):
    cutoff_date = datetime.datetime.today()
    start_date = cutoff_date - pd.Timedelta(days=30)
    end_date = cutoff_date + pd.Timedelta(days=30)

    # Filter last 30 days of actuals from model_df
    actual_df = model_df[
        (model_df['starRating'] == starRating) &
        (model_df['propertyType_cat'] == propertyType_cat) &
        (model_df['distanceFromCenter'] == distanceFromCenter) &
        (model_df['date'] >= start_date) &
        (model_df['date'] <= cutoff_date)
    ][['date', 'occupiedRooms']].copy()
    actual_df.rename(columns={'occupiedRooms': 'occupied'}, inplace=True)
    actual_df['occupied'] = actual_df['occupied'] * 1.75
    actual_df['occupied'] = np.ceil(actual_df['occupied'])

    actual_df['source'] = 'Actual'
    

    # Forecast next 30 days
    future_df = forecast_segment_all_features(
        starRating=starRating,
        propertyType_cat=propertyType_cat,
        distanceFromCenter=distanceFromCenter,
        model_df=model_df,
        cutoff_date=cutoff_date,
        end_date=end_date,
        scaler=scaler,
        lgb_model=lgb_model,
        full_feature_cols=None,
        X_train=model_df,
        holiday_dates=holiday_dates,
        tolerance=0.1
    )

    if future_df is None:
        return None, pd.DataFrame(columns=['date', 'occupied'])

    future_df = future_df[['date', 'occupied']].copy()
    future_df['occupied'] = np.ceil(future_df['occupied'])
    future_df['occupied'] = future_df['occupied'] * 1.75
    future_df['occupied'] = np.ceil(future_df['occupied'])
    future_df['source'] = 'Forecast'
    
    forecasted_rns = int(future_df['occupied'].sum())
    forecasted_revenue = int(forecasted_rns * adr)
    
    # Combine actual and forecast
    combined_df = pd.concat([actual_df, future_df], ignore_index=True)
    # Plot
    plt.figure(figsize=(10, 4))
    for label, df in combined_df.groupby('source'):
        plt.plot(df['date'], df['occupied'], label=label, marker='o')

    plt.xticks(rotation=45)
    plt.xlabel("Date")
    plt.ylabel("Occupancy")
    plt.title("Hotel Occupancy: Last 30 Days (Actual) + Next 30 Days (Forecast)")
    plt.grid(True)
    plt.legend()

    # Save plot to image buffer
    buf = io.BytesIO()
    plt.tight_layout()
    plt.savefig(buf, format='png')
    plt.close()
    buf.seek(0)
    image = Image.open(buf)
    
    folium_map = folium.Map(location=[lat, lon], zoom_start=15)
    folium.Marker([lat, lon], tooltip=property_name).add_to(folium_map)
    map_html = folium_map._repr_html_()

#     return image, future_df[['date', 'occupied']],map_html
    return image,forecasted_rns,forecasted_revenue,map_html


model_df.columns

demo = gr.Interface(
    fn=forecast_by_property,
    inputs=[
        gr.Dropdown(
            choices=property_options,
            label="Select Property",
            info="Choose from the list of properties (searchable)",
            interactive=True
        ),
        gr.Number(
            label="Average Daily Rate (ADR)",
            info="Enter the expected ADR in your currency",
            interactive=True
        )
    ],
    outputs=[
        gr.Image(type="pil", label="Forecast Plot"),
        gr.Number(label="Total Forecasted Room Nights", precision=0),
        gr.Number(label="Total Forecasted Revenue", precision=0),
        gr.HTML(label="Map")
    ],
    title="Hotel Occupancy Segment Forecast",
    description="Forecasts the next 30 days of occupancy for a selected hotel segment.",
    flagging_mode='never'
)

if __name__ == "__main__":
    demo.launch(share=True)