|
import io |
|
import folium |
|
import joblib |
|
import datetime |
|
import numpy as np |
|
import gradio as gr |
|
import pandas as pd |
|
from PIL import Image |
|
import matplotlib.pyplot as plt |
|
|
|
|
|
|
|
lgb_model = joblib.load("lgb_occupancy_model.pkl") |
|
scaler = joblib.load("scaler.pkl") |
|
|
|
|
|
model_df = pd.read_pickle("Cluster_Demand_model_df.pkl") |
|
holiday_dates = pd.read_json("holidays_2022_2025.json")['date'] |
|
holiday_dates = pd.to_datetime(holiday_dates).dt.normalize() |
|
holiday_dates = set(holiday_dates) |
|
feature_columns = joblib.load('feature_columns.pkl') |
|
|
|
|
|
properties_df = pd.read_csv('CTVNS_Properties.csv') |
|
properties_cols_to_keep = ['Property Name','Property ID', 'Star Rating', 'Property Type', 'Distance from Center','Latitude','Longitude'] |
|
properties_filtered_df = properties_df[properties_cols_to_keep].copy() |
|
|
|
|
|
property_options = properties_filtered_df['Property Name'].astype(str).tolist() |
|
|
|
property_type_mapping = { |
|
'Hotel': 9, |
|
'Homestay': 7, |
|
'Guest House': 5, |
|
'Resort': 11, |
|
'Hostel': 8, |
|
'BnB': 2, |
|
'Villa': 12, |
|
'Apartment': 1, |
|
'Apart-hotel': 0, |
|
'Holiday Home': 6, |
|
'Cottage': 3, |
|
'Lodge': 10, |
|
'Farm House': 4 |
|
} |
|
|
|
def forecast_by_property(property_name,adr): |
|
|
|
selected_row = properties_filtered_df[properties_filtered_df['Property Name'].astype(str) == property_name] |
|
if selected_row.empty: |
|
return pd.DataFrame({'Error': ['Property Name not found.']}) |
|
|
|
star_rating = int(selected_row['Star Rating'].values[0]) |
|
property_type_str = selected_row['Property Type'].values[0] |
|
property_type_cat = property_type_mapping.get(property_type_str, -1) |
|
distance = float(selected_row['Distance from Center'].values[0]) |
|
lat = selected_row['Latitude'] |
|
lon = selected_row['Longitude'] |
|
|
|
|
|
return forecast(star_rating, property_type_cat, distance,lat,lon,property_name,adr) |
|
|
|
def forecast_segment_all_features(starRating, propertyType_cat, distanceFromCenter, model_df, cutoff_date, end_date, scaler, lgb_model, full_feature_cols, X_train, holiday_dates, tolerance=0.1): |
|
"""Forecasts occupancy for a given segment.""" |
|
cluster_hist = model_df[ |
|
(model_df['starRating'] == starRating) & |
|
(model_df['propertyType_cat'] == propertyType_cat) & |
|
(np.abs(model_df['distanceFromCenter'] - distanceFromCenter) <= tolerance) & |
|
(model_df['date'] <= cutoff_date) |
|
].sort_values('date') |
|
|
|
if cluster_hist.empty: |
|
print(f"Warning: No historical data found for segment ({starRating}, {propertyType_cat}, {distanceFromCenter}) up to {cutoff_date}.") |
|
return None |
|
|
|
extended_series = pd.DataFrame({'date': pd.date_range(start=cluster_hist['date'].min(), end=end_date)}) |
|
extended_series = extended_series.merge(cluster_hist[['date', 'occupiedRooms']], on='date', how='left').rename(columns={'occupiedRooms': 'occupied'}) |
|
|
|
for i in range(len(extended_series)): |
|
current_date = extended_series.at[i, 'date'] |
|
if current_date <= pd.to_datetime(cutoff_date): |
|
continue |
|
|
|
day_of_week = current_date.dayofweek |
|
day_of_year = current_date.timetuple().tm_yday |
|
month = current_date.month |
|
year = current_date.year |
|
|
|
is_weekend = 1 if day_of_week in [4, 5] else 0 |
|
is_holiday = 1 if current_date in holiday_dates else 0 |
|
|
|
day_of_week_sin = np.sin(2 * np.pi * day_of_week / 7) |
|
day_of_year_sin = np.sin(2 * np.pi * day_of_year / 365.25) |
|
month_sin = np.sin(2 * np.pi * month / 12) |
|
base_year = model_df['date'].dt.year.min() |
|
year_scaled = year - base_year |
|
|
|
lags = {} |
|
for lag in [1, 7, 15]: |
|
lags[f'lag_{lag}'] = extended_series.at[i - lag, 'occupied'] if i - lag >= 0 else np.nan |
|
|
|
rolling_stats = {} |
|
for window in [3, 7, 15]: |
|
window_data = extended_series['occupied'].iloc[i - window:i] if i >= window else extended_series['occupied'].iloc[:i] |
|
rolling_stats[f'rolling_{window}_mean'] = window_data.mean() if len(window_data) > 0 else np.nan |
|
rolling_stats[f'rolling_{window}_std'] = window_data.std(ddof=0) if len(window_data) > 0 else np.nan |
|
|
|
daily_change = extended_series.at[i, 'occupied'] - extended_series.at[i - 1, 'occupied'] if i > 0 and pd.notnull(extended_series.at[i - 1, 'occupied']) and pd.notnull(extended_series.at[i, 'occupied']) else np.nan |
|
|
|
feature_vector = { |
|
'starRating': starRating, 'distanceFromCenter': distanceFromCenter, |
|
'day_of_week_sin': day_of_week_sin, 'day_of_year_sin': day_of_year_sin, 'month_sin': month_sin, |
|
'year_scaled': year_scaled, 'is_weekend': is_weekend, 'is_holiday': is_holiday, |
|
'lag_1': lags.get('lag_1', np.nan), 'lag_7': lags.get('lag_7', np.nan), 'lag_15': lags.get('lag_15', np.nan), |
|
'rolling_3_mean': rolling_stats.get('rolling_3_mean', np.nan), 'rolling_3_std': rolling_stats.get('rolling_3_std', np.nan), |
|
'rolling_7_mean': rolling_stats.get('rolling_7_mean', np.nan), 'rolling_7_std': rolling_stats.get('rolling_7_std', np.nan), |
|
'rolling_15_mean': rolling_stats.get('rolling_15_mean', np.nan), 'rolling_15_std': rolling_stats.get('rolling_15_std', np.nan), |
|
'daily_change': daily_change, |
|
} |
|
for j in range (10): |
|
feature_vector[f'prop_type_{j}'] = 1 if propertyType_cat == j else 0 |
|
|
|
features = pd.DataFrame([feature_vector]) |
|
features = features.reindex(columns=feature_columns) |
|
features.fillna(X_train.mean(numeric_only=True), inplace=True) |
|
features_scaled = scaler.transform(features) |
|
pred = lgb_model.predict(features_scaled)[0] |
|
extended_series.at[i, 'occupied'] = pred |
|
if i > 0: |
|
extended_series.at[i, 'daily_change'] = pred - extended_series.at[i - 1, 'occupied'] |
|
|
|
future_df = extended_series[extended_series['date'] > pd.to_datetime(cutoff_date)].copy() |
|
future_df['starRating'] = starRating |
|
future_df['distanceFromCenter'] = distanceFromCenter |
|
future_df['propertyType_cat'] = propertyType_cat |
|
return future_df |
|
|
|
|
|
|
|
def forecast(starRating, propertyType_cat, distanceFromCenter,lat,lon,property_name,adr): |
|
cutoff_date = datetime.datetime.today() |
|
start_date = cutoff_date - pd.Timedelta(days=30) |
|
end_date = cutoff_date + pd.Timedelta(days=30) |
|
|
|
|
|
actual_df = model_df[ |
|
(model_df['starRating'] == starRating) & |
|
(model_df['propertyType_cat'] == propertyType_cat) & |
|
(model_df['distanceFromCenter'] == distanceFromCenter) & |
|
(model_df['date'] >= start_date) & |
|
(model_df['date'] <= cutoff_date) |
|
][['date', 'occupiedRooms']].copy() |
|
actual_df.rename(columns={'occupiedRooms': 'occupied'}, inplace=True) |
|
actual_df['occupied'] = actual_df['occupied'] * 1.75 |
|
actual_df['occupied'] = np.ceil(actual_df['occupied']) |
|
|
|
actual_df['source'] = 'Actual' |
|
|
|
|
|
|
|
future_df = forecast_segment_all_features( |
|
starRating=starRating, |
|
propertyType_cat=propertyType_cat, |
|
distanceFromCenter=distanceFromCenter, |
|
model_df=model_df, |
|
cutoff_date=cutoff_date, |
|
end_date=end_date, |
|
scaler=scaler, |
|
lgb_model=lgb_model, |
|
full_feature_cols=None, |
|
X_train=model_df, |
|
holiday_dates=holiday_dates, |
|
tolerance=0.1 |
|
) |
|
|
|
if future_df is None: |
|
return None, pd.DataFrame(columns=['date', 'occupied']) |
|
|
|
future_df = future_df[['date', 'occupied']].copy() |
|
future_df['occupied'] = np.ceil(future_df['occupied']) |
|
future_df['occupied'] = future_df['occupied'] * 1.75 |
|
future_df['occupied'] = np.ceil(future_df['occupied']) |
|
future_df['source'] = 'Forecast' |
|
|
|
forecasted_rns = int(future_df['occupied'].sum()) |
|
forecasted_revenue = int(forecasted_rns * adr) |
|
|
|
|
|
combined_df = pd.concat([actual_df, future_df], ignore_index=True) |
|
|
|
plt.figure(figsize=(10, 4)) |
|
for label, df in combined_df.groupby('source'): |
|
plt.plot(df['date'], df['occupied'], label=label, marker='o') |
|
|
|
plt.xticks(rotation=45) |
|
plt.xlabel("Date") |
|
plt.ylabel("Occupancy") |
|
plt.title("Hotel Occupancy: Last 30 Days (Actual) + Next 30 Days (Forecast)") |
|
plt.grid(True) |
|
plt.legend() |
|
|
|
|
|
buf = io.BytesIO() |
|
plt.tight_layout() |
|
plt.savefig(buf, format='png') |
|
plt.close() |
|
buf.seek(0) |
|
image = Image.open(buf) |
|
|
|
folium_map = folium.Map(location=[lat, lon], zoom_start=15) |
|
folium.Marker([lat, lon], tooltip=property_name).add_to(folium_map) |
|
map_html = folium_map._repr_html_() |
|
|
|
|
|
return image,forecasted_rns,forecasted_revenue,map_html |
|
|
|
|
|
model_df.columns |
|
|
|
demo = gr.Interface( |
|
fn=forecast_by_property, |
|
inputs=[ |
|
gr.Dropdown( |
|
choices=property_options, |
|
label="Select Property", |
|
info="Choose from the list of properties (searchable)", |
|
interactive=True |
|
), |
|
gr.Number( |
|
label="Average Daily Rate (ADR)", |
|
info="Enter the expected ADR in your currency", |
|
interactive=True |
|
) |
|
], |
|
outputs=[ |
|
gr.Image(type="pil", label="Forecast Plot"), |
|
gr.Number(label="Total Forecasted Room Nights", precision=0), |
|
gr.Number(label="Total Forecasted Revenue", precision=0), |
|
gr.HTML(label="Map") |
|
], |
|
title="Hotel Occupancy Segment Forecast", |
|
description="Forecasts the next 30 days of occupancy for a selected hotel segment.", |
|
flagging_mode='never' |
|
) |
|
|
|
if __name__ == "__main__": |
|
demo.launch(share=True) |
|
|