howardroark's picture
code updates
019a614
from sklearn.model_selection import train_test_split
from xgboost import XGBRegressor, XGBClassifier
from causalml.inference.tree import UpliftRandomForestClassifier
from causalml.inference.meta import BaseXRegressor, BaseRRegressor, BaseSRegressor, BaseTRegressor
from causalml.inference.meta import BaseSClassifier, BaseTClassifier, BaseXClassifier, BaseRClassifier
class ModelTraining:
def __init__(self, df, y_name, X_names):
self.df = df
self.y_name = y_name
self.X_names = X_names
self.df_train = None
self.df_test = None
self.conversion_learner_t = None
self.benefit_learner_t = None
self.conversion_learner_t_tau = None
self.benefit_learner_t_tau = None
def split_data(self, test_size, random_state):
self.df_train, self.df_test = train_test_split(
self.df,
test_size=test_size,
random_state=random_state
)
def fit_predict_classifier(self, params, control_name):
self.conversion_learner_t = BaseTClassifier(XGBClassifier(**params), control_name=control_name)
self.conversion_learner_t_tau = self.conversion_learner_t.fit_predict(
X=self.df_train[self.X_names].values,
treatment=self.df_train['treatment_group_key'].values,
y=self.df_train[self.y_name].values
)
self.conversion_learner_t.feature_names = self.X_names
return self.conversion_learner_t_tau
def fit_predict_regressor(self, params, control_name):
self.benefit_learner_t = BaseTRegressor(XGBRegressor(**params), control_name=control_name)
self.benefit_learner_t_tau = self.benefit_learner_t.fit_predict(
X=self.df_train[self.X_names].values,
treatment=self.df_train['treatment_group_key'].values,
y=self.df_train[self.y_name].values
)
self.benefit_learner_t.feature_names = self.X_names
return self.benefit_learner_t_tau
# def _fit_predict(self):
# self.learner_t_tau = self.learner_t.fit_predict(
# X=self.df_train[self.X_names].values,
# treatment=self.df_train['treatment_group_key'].values,
# y=self.df_train[self.y_name].values
# )
# self.learner_t.feature_names = self.X_names
# return self.learner_t_tau
def compute_feature_importance(self):
if self.y_name == 'conversion':
if self.conversion_learner_t is None:
raise ValueError("Model must be fitted before computing feature importances.")
return self.conversion_learner_t.get_importance(
X=self.df_train[self.X_names],
tau=self.conversion_learner_t_tau,
features=self.X_names,
normalize=True,
method='auto'
)
elif self.y_name == 'benefit':
if self.benefit_learner_t is None:
raise ValueError("Model must be fitted before computing feature importances.")
return self.benefit_learner_t.get_importance(
X=self.df_train[self.X_names],
tau=self.benefit_learner_t_tau,
features=self.X_names,
normalize=True,
method='auto'
)
# if self.learner_t is None:
# raise ValueError("Model must be fitted before computing feature importances.")
# return self.learner_t.get_importance(
# X=self.df_train[self.X_names],
# tau=self.learner_t_tau,
# features=self.X_names,
# normalize=True,
# method='auto'
# )