Spaces:
Sleeping
Sleeping
from sklearn.model_selection import train_test_split | |
from xgboost import XGBRegressor, XGBClassifier | |
from causalml.inference.tree import UpliftRandomForestClassifier | |
from causalml.inference.meta import BaseXRegressor, BaseRRegressor, BaseSRegressor, BaseTRegressor | |
from causalml.inference.meta import BaseSClassifier, BaseTClassifier, BaseXClassifier, BaseRClassifier | |
class ModelTraining: | |
def __init__(self, df, y_name, X_names): | |
self.df = df | |
self.y_name = y_name | |
self.X_names = X_names | |
self.df_train = None | |
self.df_test = None | |
self.conversion_learner_t = None | |
self.benefit_learner_t = None | |
self.conversion_learner_t_tau = None | |
self.benefit_learner_t_tau = None | |
def split_data(self, test_size, random_state): | |
self.df_train, self.df_test = train_test_split( | |
self.df, | |
test_size=test_size, | |
random_state=random_state | |
) | |
def fit_predict_classifier(self, params, control_name): | |
self.conversion_learner_t = BaseTClassifier(XGBClassifier(**params), control_name=control_name) | |
self.conversion_learner_t_tau = self.conversion_learner_t.fit_predict( | |
X=self.df_train[self.X_names].values, | |
treatment=self.df_train['treatment_group_key'].values, | |
y=self.df_train[self.y_name].values | |
) | |
self.conversion_learner_t.feature_names = self.X_names | |
return self.conversion_learner_t_tau | |
def fit_predict_regressor(self, params, control_name): | |
self.benefit_learner_t = BaseTRegressor(XGBRegressor(**params), control_name=control_name) | |
self.benefit_learner_t_tau = self.benefit_learner_t.fit_predict( | |
X=self.df_train[self.X_names].values, | |
treatment=self.df_train['treatment_group_key'].values, | |
y=self.df_train[self.y_name].values | |
) | |
self.benefit_learner_t.feature_names = self.X_names | |
return self.benefit_learner_t_tau | |
# def _fit_predict(self): | |
# self.learner_t_tau = self.learner_t.fit_predict( | |
# X=self.df_train[self.X_names].values, | |
# treatment=self.df_train['treatment_group_key'].values, | |
# y=self.df_train[self.y_name].values | |
# ) | |
# self.learner_t.feature_names = self.X_names | |
# return self.learner_t_tau | |
def compute_feature_importance(self): | |
if self.y_name == 'conversion': | |
if self.conversion_learner_t is None: | |
raise ValueError("Model must be fitted before computing feature importances.") | |
return self.conversion_learner_t.get_importance( | |
X=self.df_train[self.X_names], | |
tau=self.conversion_learner_t_tau, | |
features=self.X_names, | |
normalize=True, | |
method='auto' | |
) | |
elif self.y_name == 'benefit': | |
if self.benefit_learner_t is None: | |
raise ValueError("Model must be fitted before computing feature importances.") | |
return self.benefit_learner_t.get_importance( | |
X=self.df_train[self.X_names], | |
tau=self.benefit_learner_t_tau, | |
features=self.X_names, | |
normalize=True, | |
method='auto' | |
) | |
# if self.learner_t is None: | |
# raise ValueError("Model must be fitted before computing feature importances.") | |
# return self.learner_t.get_importance( | |
# X=self.df_train[self.X_names], | |
# tau=self.learner_t_tau, | |
# features=self.X_names, | |
# normalize=True, | |
# method='auto' | |
# ) |