Spaces:

zq13648
/

demand-forecasting

Runtime error

demand-forecasting / src /forecaster /forecaster_old.py

zhang qiao

Upload folder using huggingface_hub

8cf4695 about 1 year ago

6.67 kB

	from typing import List

	import logging
	import pandas as pd
	from statsmodels.tsa.tsatools import freq_to_period
	from sklearn.metrics import mean_squared_error
	from math import sqrt

	from .models import AllModels

	logging.basicConfig(level=logging.DEBUG)


	class Forecaster():

	def __init__(
	self,
	) -> None:
	logging.debug('Forecaster init')

	self.models = {} # Init models dict

	def fit(self, data):
	'''
	Fot data into the forecaster
	'''
	self.data = data
	pass

	def forecast(
	self,
	data: pd.DataFrame,
	models: str or List[str] = 'all',
	test: bool = False,
	enable_exog: bool = True
	):
	'''
	Main function, will perform the entire forecast operation

	data : pd.DataFrame, required
	Data for training the model, must contain "datetime", "y" columns, any additional column
	will be considered as exogenuous columns and be used for multivariate forecasting
	data must be cleaned without any missing value
	data's datetime column must be valid datetime strings, the frequency must be able to inference

	models : str or List[str], default='all'
	Selected model(s) to use fore forecasting. Default is "all",
	which will use all available models registered in models.AllModels

	test : bool, default=False
	Decide if the forecasting purpose is for testing or actual prediction
	Testing and prediction will not happen at the same time. 20% of the data
	will be splitted for testing

	enable_exog : bool, default=True
	If disabled, exog data will not be used in the model training, and the data will be considered as univariate data
	If enabled, and the data does contains exog data, for multivariate forecasting purpose, the data must be shifted
	by n_predict steps. This will cause a few things:
	1. y column will be remapped to exog data that is n_predict unit of time ago
	2. n_predict length of the oldest y will be trimmed off
	3. n_predict length of exog values will be used for the forecasting
	'''
	logging.debug('Start forecasting ...')

	self.enable_exog = enable_exog

	# Below properties will be init by prep_data()
	self.data: pd.DataFrame = None
	self.y = None
	self.exog = None
	self.freq: str = None
	self.period: int = None

	self.y_test = None

	self.n_predict: int = None # init by calculate_n_predict()

	self.kwargs = {}

	self.results = [] # Contains all result value

	# Prepare data, including set the datetime index, slit y and exog columns
	self.prep_data(data)

	# Calculate n_predict value based on self.period
	self.calculate_n_predict()

	# Init the basic kwargs for models to use
	self.init_kwargs()

	# Shift exog value by n_predict unit of time
	self.shift_exog()

	# Split test set for testing purpose
	if test:
	logging.debug('Testing ...')
	self.train_test_split()

	# ================================ #
	# Train models and make prediction #
	# ================================ #

	self.init_models(models)

	for model_name, model in self.models.items():
	result = {
	'model': model_name,
	'result': None,
	'evaluate': None,
	'rmse': None,
	}

	fcst = model.forecast()

	# Assign the models result to the result dict
	if 'forecast' in fcst.keys():
	result['result'] = fcst['forecast']
	else:
	result['result'] = fcst

	if 'evaluate' in fcst.keys():
	result['evaluate'] = fcst['evaluate']

	if test:
	mse = mean_squared_error(self.y_test, result['result'])
	result['rmse'] = sqrt(mse)

	self.results.append(result)

	# - END of forecast - #

	def init_models(self, models):
	'''
	Initialize models based on the provided parameter.
	Get self.models ready for forecasting
	'''
	logging.debug('Init models')

	all_models = AllModels(models)

	self.models = all_models.init_models(
	self.y,
	self.n_predict,
	self.exog,
	**self.kwargs)

	def prep_data(
	self,
	data: pd.DataFrame
	) -> None:
	logging.debug('Prep data')

	self.data = data.copy()
	self.data.set_index('datetime', inplace=True)
	self.data.index = pd.to_datetime(self.data.index)

	logging.debug('Inferencing freq and period')
	self.freq = pd.infer_freq(self.data.index)
	self.period = freq_to_period(self.freq)

	self.y = self.data['y']

	if len(self.data.columns) > 1 and self.enable_exog:
	self.exog = self.data.drop(columns='y')

	def calculate_n_predict(self):
	'''
	The n_predict will be the smaller number in 20, self.period value

	By default, try only predict 1 seasonal cycle
	'''
	n_predict = min(20, self.period)

	# Set a max prediction size to be 20% of given data size
	if n_predict > int(len(self.data)*0.2):
	n_predict = int(len(self.data)*0.2)

	# Set a min prediction to be 4
	if n_predict < 4:
	n_predict = 4

	self.n_predict = n_predict

	def init_kwargs(self):
	'''
	kwargs will be used for initializing models.
	kwargs contains all necessary information about the data
	'''
	self.kwargs['period'] = self.period

	def train_test_split(self):
	'''
	n_predict length of y value will be splitted out for testing
	although, each model will probably have it's own cross validator
	'''
	logging.debug('Train test split')
	self.y_test = self.y[-self.n_predict:]
	self.y = self.y[:-self.n_predict]

	if self.exog is not None:
	self.exog = self.exog[:-self.n_predict]

	def shift_exog(self):

	if self.exog is not None:
	logging.debug('Shifted exog datetime index by n_predict period')
	self.exog.index = self.exog.index.shift(
	self.n_predict, freq=self.freq)

	logging.debug(
	'Trimmed y by n_predict, so it is aligned with shifted exog')
	self.y = self.y[self.n_predict:]