from sklearn.model_selection import train_test_split from xgboost import XGBRegressor, XGBClassifier from causalml.inference.tree import UpliftRandomForestClassifier from causalml.inference.meta import BaseXRegressor, BaseRRegressor, BaseSRegressor, BaseTRegressor from causalml.inference.meta import BaseSClassifier, BaseTClassifier, BaseXClassifier, BaseRClassifier class ModelTraining: def __init__(self, df, y_name, X_names): self.df = df self.y_name = y_name self.X_names = X_names self.df_train = None self.df_test = None self.conversion_learner_t = None self.benefit_learner_t = None self.conversion_learner_t_tau = None self.benefit_learner_t_tau = None def split_data(self, test_size, random_state): self.df_train, self.df_test = train_test_split( self.df, test_size=test_size, random_state=random_state ) def fit_predict_classifier(self, params, control_name): self.conversion_learner_t = BaseTClassifier(XGBClassifier(**params), control_name=control_name) self.conversion_learner_t_tau = self.conversion_learner_t.fit_predict( X=self.df_train[self.X_names].values, treatment=self.df_train['treatment_group_key'].values, y=self.df_train[self.y_name].values ) self.conversion_learner_t.feature_names = self.X_names return self.conversion_learner_t_tau def fit_predict_regressor(self, params, control_name): self.benefit_learner_t = BaseTRegressor(XGBRegressor(**params), control_name=control_name) self.benefit_learner_t_tau = self.benefit_learner_t.fit_predict( X=self.df_train[self.X_names].values, treatment=self.df_train['treatment_group_key'].values, y=self.df_train[self.y_name].values ) self.benefit_learner_t.feature_names = self.X_names return self.benefit_learner_t_tau # def _fit_predict(self): # self.learner_t_tau = self.learner_t.fit_predict( # X=self.df_train[self.X_names].values, # treatment=self.df_train['treatment_group_key'].values, # y=self.df_train[self.y_name].values # ) # self.learner_t.feature_names = self.X_names # return self.learner_t_tau def compute_feature_importance(self): if self.y_name == 'conversion': if self.conversion_learner_t is None: raise ValueError("Model must be fitted before computing feature importances.") return self.conversion_learner_t.get_importance( X=self.df_train[self.X_names], tau=self.conversion_learner_t_tau, features=self.X_names, normalize=True, method='auto' ) elif self.y_name == 'benefit': if self.benefit_learner_t is None: raise ValueError("Model must be fitted before computing feature importances.") return self.benefit_learner_t.get_importance( X=self.df_train[self.X_names], tau=self.benefit_learner_t_tau, features=self.X_names, normalize=True, method='auto' ) # if self.learner_t is None: # raise ValueError("Model must be fitted before computing feature importances.") # return self.learner_t.get_importance( # X=self.df_train[self.X_names], # tau=self.learner_t_tau, # features=self.X_names, # normalize=True, # method='auto' # )