tab / forecast.py
GhoulMac's picture
Upload 8 files
d47fa16
raw
history blame
4.17 kB
#!/usr/bin/env python
# coding: utf-8
import sklearn
from tsai.basics import *
#my_setup(sklearn)
import pandas as pd
df=pd.read_csv('D:/project/aircompressordata.csv')
df=df.drop(['data_id','ideal_specific_power_kw_100cfm','device_id'],axis=1)
print(df.columns)
df.head(5)
# In[3]:
datetime_col="timestamp"
freq='1H'
coloumns=df.columns[:7]
method='ffill'
value=0
preproc_pipe=sklearn.pipeline.Pipeline([
('shrinker',TSShrinkDataFrame()),
('drop_duplicates',TSDropDuplicates(datetime_col=datetime_col)),
('add_mts',TSAddMissingTimestamps(datetime_col=datetime_col,freq=freq)),
('fill_missing',TSFillMissing(columns=coloumns,method=method,value=value)),
],
verbose=True)
mkdir('data', exist_ok=True,parents=True)
save_object(preproc_pipe,'data/preproc_pipe.pkl')
preproc_pipe=load_object('data/preproc_pipe.pkl')
df=preproc_pipe.fit_transform(df)
# In[4]:
df.head()
# In[5]:
fcst_history=200
fcst_horizon=72
valid_size=0.1
test_size=0.2
splits=get_forecasting_splits(df,fcst_history=fcst_history,fcst_horizon=fcst_horizon,datetime_col=datetime_col,
valid_size=valid_size,test_size=test_size)
splits
# In[6]:
coloumns=df.columns[1:]
train_split=splits[0]
exp_pipe=sklearn.pipeline.Pipeline([
('scaler',TSStandardScaler(columns=coloumns)),
],
verbose=True)
save_object(exp_pipe,'data/exp_pipe.pkl')
exp_pipe=load_object('data/exp_pipe.pkl')
df_scaled=exp_pipe.fit_transform(df,scaler__idxs=train_split)
df_scaled
# In[7]:
x_vars=df.columns[1:]
y_vars=df.columns[1:]
# In[8]:
X,y=prepare_forecasting_data(df,fcst_history=fcst_history,fcst_horizon=fcst_horizon,x_vars=x_vars,y_vars=y_vars)
X.shape , y.shape
# In[9]:
arch_config=dict(
n_layers=3,
n_heads=4,
d_model=16,
d_ff=128,
attn_dropout=0.0,
dropout=0.3,
patch_len=24,
stride=2,
padding_patch=True,
)
# In[10]:
learn=TSForecaster(X,y,splits=splits,
batch_size=16,path="models",
pipelines=[preproc_pipe,exp_pipe],
arch="PatchTST",
arch_config=arch_config,
metrics=[mse,mae],
cbs=ShowGraph())
# In[11]:
learn.summary()
# In[12]:
lr_max=learn.lr_find().valley
# In[ ]:
n_epochs=100
learn.fit_one_cycle(n_epoch=n_epochs,lr_max=lr_max)
learn.export('PatchTST.pt')
# In[14]:
from tsai.inference import load_learner
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.metrics import mean_absolute_percentage_error
learn=load_learner('models/PatchTST.pt')
y_test_preds, *_=learn.get_X_preds(X[splits[2]])
y_test_preds=to_np(y_test_preds)
print(y_test_preds.shape)
y_test=y[splits[2]]
print(mean_squared_error(y_test.flatten(),y_test_preds.flatten()))
print(mean_absolute_error(y_test.flatten(),y_test_preds.flatten()))
print(mean_absolute_percentage_error(y_test.flatten(),y_test_preds.flatten()))
# In[15]:
X_test=X[splits[2]]
plot_forecast(X_test,y_test,y_test_preds,sel_vars=True)
# In[16]:
fcst_date="2023-07-31 23:00:00"
dates=pd.date_range(start=None,end=fcst_date,periods=fcst_history,freq=freq)
dates
# In[17]:
#df=pd.read_csv('D:/project/aircompressordata.csv')
#df=preproc_pipe.fit_transform(df)
new_df=df[df[datetime_col].isin(dates)].reset_index(drop=True)
new_df
# In[18]:
from tsai.inference import load_learner
predict=load_learner('models/PatchTST.pt')
new_df=predict.transform(new_df)
new_df
# In[19]:
x_feat=new_df.columns[1:]
new_x,__=prepare_forecasting_data(new_df,fcst_history=fcst_history,fcst_horizon=0,x_vars=x_vars,y_vars=y_vars)
new_x.shape
# In[20]:
new_scaled_preds, *_ = learn.get_X_preds(new_x)
new_scaled_preds=to_np(new_scaled_preds).swapaxes(1,2).reshape(-1,len(y_vars))
dates=pd.date_range(start=fcst_date, periods=fcst_horizon+1,freq='1H')[1:]
preds_df=pd.DataFrame(dates,columns=[datetime_col])
preds_df.loc[:, y_vars]=new_scaled_preds
preds_df=learn.inverse_transform(preds_df)
preds_df
# In[1]:
from tsai.export import get_nb_name; nb_name=get_nb_name(locals())
from tsai.imports import create_scripts; create_scripts(nb_name)
# In[ ]: