dna-casestudy / code /future_features.py
davidna22's picture
Upload folder using huggingface_hub
dad00c5 verified
raw
history blame
1.17 kB
FEATURES["future_covariates_final"] = []
for col in FEATURES["future_covariates"]:
new_features = data_preprocess[col].to_frame().copy()
# Lag Features
new_features[col+"_L0D"] = new_features[col].shift(0)
new_features[col+"_L1D"] = new_features[col].shift(1)
new_features[col+"_L2D"] = new_features[col].shift(2)
# Rolling Features (No shift needed for future vars)
new_features[col+"_RMean14D"] = new_features[col].rolling('14D').mean()
new_features[col+"_RMin14D"] = new_features[col].rolling('14D').min()
# Expanding Window (No shift needed for future vars)
new_features[col+"_EMean14D"] = new_features[col].expanding(min_periods=14).mean()
new_features[col+"_EMin14D"] = new_features[col].expanding(min_periods=14).min()
FEATURES["future_covariates_final"].extend([col+"_L0D", col+"_L1D", col+"_L2D", col+"_RMean14D", col+"_RMin14D", col+"_EMean14D", col+"_EMin14D"])
new_features = new_features.drop(columns=col)
data_preprocess = pd.concat([data_preprocess, new_features], axis=1)
assert len(data_preprocess.loc[:, FEATURES["future_covariates_final"]].columns) == len(FEATURES["future_covariates"])*7