Spaces:
Runtime error
Runtime error
from typing import List | |
import logging | |
import pandas as pd | |
from statsmodels.tsa.tsatools import freq_to_period | |
from sklearn.metrics import mean_squared_error | |
from math import sqrt | |
from .models import AllModels | |
logging.basicConfig(level=logging.DEBUG) | |
class Forecaster(): | |
def __init__( | |
self, | |
) -> None: | |
logging.debug('Forecaster init') | |
self.models = {} # Init models dict | |
def fit(self, data): | |
''' | |
Fot data into the forecaster | |
''' | |
self.data = data | |
pass | |
def forecast( | |
self, | |
data: pd.DataFrame, | |
models: str or List[str] = 'all', | |
test: bool = False, | |
enable_exog: bool = True | |
): | |
''' | |
Main function, will perform the entire forecast operation | |
data : pd.DataFrame, required | |
Data for training the model, must contain "datetime", "y" columns, any additional column | |
will be considered as exogenuous columns and be used for multivariate forecasting | |
data must be cleaned without any missing value | |
data's datetime column must be valid datetime strings, the frequency must be able to inference | |
models : str or List[str], default='all' | |
Selected model(s) to use fore forecasting. Default is "all", | |
which will use all available models registered in models.AllModels | |
test : bool, default=False | |
Decide if the forecasting purpose is for testing or actual prediction | |
Testing and prediction will not happen at the same time. 20% of the data | |
will be splitted for testing | |
enable_exog : bool, default=True | |
If disabled, exog data will not be used in the model training, and the data will be considered as univariate data | |
If enabled, and the data does contains exog data, for multivariate forecasting purpose, the data must be shifted | |
by n_predict steps. This will cause a few things: | |
1. y column will be remapped to exog data that is n_predict unit of time ago | |
2. n_predict length of the oldest y will be trimmed off | |
3. n_predict length of exog values will be used for the forecasting | |
''' | |
logging.debug('Start forecasting ...') | |
self.enable_exog = enable_exog | |
# Below properties will be init by prep_data() | |
self.data: pd.DataFrame = None | |
self.y = None | |
self.exog = None | |
self.freq: str = None | |
self.period: int = None | |
self.y_test = None | |
self.n_predict: int = None # init by calculate_n_predict() | |
self.kwargs = {} | |
self.results = [] # Contains all result value | |
# Prepare data, including set the datetime index, slit y and exog columns | |
self.prep_data(data) | |
# Calculate n_predict value based on self.period | |
self.calculate_n_predict() | |
# Init the basic kwargs for models to use | |
self.init_kwargs() | |
# Shift exog value by n_predict unit of time | |
self.shift_exog() | |
# Split test set for testing purpose | |
if test: | |
logging.debug('Testing ...') | |
self.train_test_split() | |
# ================================ # | |
# Train models and make prediction # | |
# ================================ # | |
self.init_models(models) | |
for model_name, model in self.models.items(): | |
result = { | |
'model': model_name, | |
'result': None, | |
'evaluate': None, | |
'rmse': None, | |
} | |
fcst = model.forecast() | |
# Assign the models result to the result dict | |
if 'forecast' in fcst.keys(): | |
result['result'] = fcst['forecast'] | |
else: | |
result['result'] = fcst | |
if 'evaluate' in fcst.keys(): | |
result['evaluate'] = fcst['evaluate'] | |
if test: | |
mse = mean_squared_error(self.y_test, result['result']) | |
result['rmse'] = sqrt(mse) | |
self.results.append(result) | |
# - END of forecast - # | |
def init_models(self, models): | |
''' | |
Initialize models based on the provided parameter. | |
Get self.models ready for forecasting | |
''' | |
logging.debug('Init models') | |
all_models = AllModels(models) | |
self.models = all_models.init_models( | |
self.y, | |
self.n_predict, | |
self.exog, | |
**self.kwargs) | |
def prep_data( | |
self, | |
data: pd.DataFrame | |
) -> None: | |
logging.debug('Prep data') | |
self.data = data.copy() | |
self.data.set_index('datetime', inplace=True) | |
self.data.index = pd.to_datetime(self.data.index) | |
logging.debug('Inferencing freq and period') | |
self.freq = pd.infer_freq(self.data.index) | |
self.period = freq_to_period(self.freq) | |
self.y = self.data['y'] | |
if len(self.data.columns) > 1 and self.enable_exog: | |
self.exog = self.data.drop(columns='y') | |
def calculate_n_predict(self): | |
''' | |
The n_predict will be the smaller number in 20, self.period value | |
By default, try only predict 1 seasonal cycle | |
''' | |
n_predict = min(20, self.period) | |
# Set a max prediction size to be 20% of given data size | |
if n_predict > int(len(self.data)*0.2): | |
n_predict = int(len(self.data)*0.2) | |
# Set a min prediction to be 4 | |
if n_predict < 4: | |
n_predict = 4 | |
self.n_predict = n_predict | |
def init_kwargs(self): | |
''' | |
kwargs will be used for initializing models. | |
kwargs contains all necessary information about the data | |
''' | |
self.kwargs['period'] = self.period | |
def train_test_split(self): | |
''' | |
n_predict length of y value will be splitted out for testing | |
although, each model will probably have it's own cross validator | |
''' | |
logging.debug('Train test split') | |
self.y_test = self.y[-self.n_predict:] | |
self.y = self.y[:-self.n_predict] | |
if self.exog is not None: | |
self.exog = self.exog[:-self.n_predict] | |
def shift_exog(self): | |
if self.exog is not None: | |
logging.debug('Shifted exog datetime index by n_predict period') | |
self.exog.index = self.exog.index.shift( | |
self.n_predict, freq=self.freq) | |
logging.debug( | |
'Trimmed y by n_predict, so it is aligned with shifted exog') | |
self.y = self.y[self.n_predict:] | |