Spaces:

danielvarga
/

pq

Sleeping

File size: 4,312 Bytes

0ea2c50
e9cac80
 
 
7c98739
0ea2c50
8ec3059
e9cac80
 
0ea2c50
 
 
e9cac80
7c98739
 
 
e9cac80
 
8ec3059
0ea2c50
 
7c98739
 
e9cac80
969cffd
 
 
 
 
0ea2c50
e9cac80
0ea2c50
 
 
 
 
8ec3059
0ea2c50
 
 
 
969cffd
e9cac80
8ec3059
 
 
 
 
 
 
969cffd
8ec3059
 
e9cac80
0ea2c50
 
969cffd
 
 
0ea2c50
 
 
 
 
 
 
 
 
 
969cffd
0ea2c50
 
 
e9cac80
0ea2c50
 
969cffd
8ec3059
0ea2c50
e9cac80
 
0ea2c50
 
 
 
e9cac80
 
 
0ea2c50
 
 
 
 
 
 
 
 
 
 
 
969cffd
 
e9cac80
 
0ea2c50
 
e9cac80
0ea2c50
e9cac80
 
0ea2c50
 
 
969cffd
8ec3059
0ea2c50
 
 
e9cac80
0ea2c50

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from prophet import Prophet
import holidays
import logging
from sklearn.metrics import mean_absolute_error


# kW
PREDICTION_LOWER_BOUND = 0 # 15
print("do not forget about hardwired prediction lower bound", PREDICTION_LOWER_BOUND, "kW")

hungarian_holidays = holidays.Hungary(years=range(2019, 2031))
HOLIDAY_DF = pd.DataFrame(list(hungarian_holidays.items()), columns=['ds', 'holiday'])



def prophet_backend(train_data, forecast_horizon):
    # Initialize and train the Prophet model using the training data
    model = Prophet(seasonality_mode='multiplicative', growth='flat',
        yearly_seasonality=False, weekly_seasonality=True, daily_seasonality=True,
        holidays=HOLIDAY_DF)

    # we can also play with setting daily_seasonality=False above, and then manually adding
    # model.add_seasonality("daily", 1, fourier_order=10, prior_scale=100, mode="multiplicative")
    # ...it didn't really work though. bumping the fourier_order helps, but makes the model slow.
    # the rest didn't have much effect.

    model.fit(train_data)

    # Create a DataFrame with future timestamps for the evaluation period
    future = model.make_future_dataframe(periods=forecast_horizon, freq='15T', include_history=False)

    # Make predictions for the evaluation period
    forecast = model.predict(future)
    assert len(forecast) == forecast_horizon

    for key in ('yhat', 'yhat_lower', 'yhat_upper'):
        forecast[key] = np.maximum(forecast[key], PREDICTION_LOWER_BOUND)

    return forecast, model


def prediction_task(backend, df, split_date, forecast_horizon):
    # Split the data into training (past) and evaluation (future) sets
    train_data = df[df['ds'] <= split_date]
    eval_data = df[df['ds'] > split_date]
    eval_data = eval_data.head(forecast_horizon)

    forecast, model = backend(train_data, forecast_horizon)

    mae = mean_absolute_error(eval_data['y'], forecast['yhat'])

    do_vis = False
    if do_vis:
        future = model.make_future_dataframe(periods=forecast_horizon, freq='15T', include_history=True)
        forecast = model.predict(future)

        plt.figure(figsize=(12, 6))
        plt.plot(eval_data['ds'], eval_data['y'], label='Actual', color='blue')
        plt.plot(forecast['ds'], forecast['yhat'], label='Predicted', color='red')
        plt.fill_between(forecast['ds'], forecast['yhat_lower'], forecast['yhat_upper'], color='pink', alpha=0.5, label='Uncertainty')
        plt.xlabel('Timestamp')
        plt.ylabel('Value')
        plt.title('Actual vs. Predicted Values')
        plt.legend()
        plt.grid(True)
        plt.show()

        fig1 = model.plot(forecast)
        plt.plot(eval_data['ds'], eval_data['y'], c='r')
        plt.show()

        fig2 = model.plot_components(forecast)
        plt.show()
        exit()

    return mae, eval_data['y'].mean()


logger = logging.getLogger('cmdstanpy')
logger.addHandler(logging.NullHandler())
logger.propagate = False
logger.setLevel(logging.CRITICAL)



cons_filename = 'pq_terheles_2021_adatok.tsv'

df = pd.read_csv(cons_filename, sep='\t', skipinitialspace=True, na_values='n/a', decimal=',')
df['Time'] = pd.to_datetime(df['Korrigált időpont'], format='%m/%d/%y %H:%M')
df = df.set_index('Time')
df['Consumption'] = df['Hatásos teljesítmény [kW]']

df['ds'] = df.index
df['y'] = df['Consumption']


# TODO 15 minutes timestep hardwired!
forecast_horizon = 24 * 4
print("forecast horizon", forecast_horizon // 4, "hours")


start_date = '2021-06-01'
end_date = '2021-10-24'

weekly_date_range = pd.date_range(start=start_date, end=end_date, freq='8d')


maes = []
mean_values = []
for split_date in weekly_date_range:
    # prophet_backend is the only backend currently
    mae, mean_value = prediction_task(prophet_backend, df, split_date, forecast_horizon)
    maes.append(mae)
    mean_values.append(mean_value)
    print(split_date, "Mean Absolute Error", mae, "MAE/true mean", mae / mean_value)

maes = np.array(maes)
mean_values = np.array(mean_values)
aggregate_mae = maes.mean()
print("Mean Absolute Error over whole date range", weekly_date_range[0], "-", weekly_date_range[-1], ":", aggregate_mae)
print("Mean Absolute Error / true mean over whole date range", aggregate_mae / mean_values.mean())