File size: 3,733 Bytes
6f4f21f
 
 
 
 
 
 
 
 
 
 
 
 
 
019a614
6f4f21f
 
 
019a614
 
 
6f4f21f
 
 
 
 
 
 
 
019a614
 
 
 
 
 
 
 
6f4f21f
 
019a614
 
6f4f21f
 
 
 
019a614
 
 
 
 
 
 
 
 
 
 
6f4f21f
 
019a614
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6f4f21f
019a614
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
from sklearn.model_selection import train_test_split
from xgboost import XGBRegressor, XGBClassifier

from causalml.inference.tree import UpliftRandomForestClassifier
from causalml.inference.meta import BaseXRegressor, BaseRRegressor, BaseSRegressor, BaseTRegressor
from causalml.inference.meta import BaseSClassifier, BaseTClassifier, BaseXClassifier, BaseRClassifier

class ModelTraining:
    def __init__(self, df, y_name, X_names):
        self.df = df
        self.y_name = y_name
        self.X_names = X_names
        self.df_train = None
        self.df_test = None

        self.conversion_learner_t = None
        self.benefit_learner_t = None

        self.conversion_learner_t_tau = None
        self.benefit_learner_t_tau = None

    def split_data(self, test_size, random_state):
        self.df_train, self.df_test = train_test_split(
            self.df,
            test_size=test_size,
            random_state=random_state
        )
        
    def fit_predict_classifier(self, params, control_name):
        self.conversion_learner_t = BaseTClassifier(XGBClassifier(**params), control_name=control_name)
        self.conversion_learner_t_tau = self.conversion_learner_t.fit_predict(
            X=self.df_train[self.X_names].values,
            treatment=self.df_train['treatment_group_key'].values,
            y=self.df_train[self.y_name].values
        )
        self.conversion_learner_t.feature_names = self.X_names
        return self.conversion_learner_t_tau

    def fit_predict_regressor(self, params, control_name):
        self.benefit_learner_t = BaseTRegressor(XGBRegressor(**params), control_name=control_name)
        self.benefit_learner_t_tau = self.benefit_learner_t.fit_predict(
            X=self.df_train[self.X_names].values,
            treatment=self.df_train['treatment_group_key'].values,
            y=self.df_train[self.y_name].values
        )
        self.benefit_learner_t.feature_names = self.X_names
        return self.benefit_learner_t_tau

    # def _fit_predict(self):
    #     self.learner_t_tau = self.learner_t.fit_predict(
    #         X=self.df_train[self.X_names].values,
    #         treatment=self.df_train['treatment_group_key'].values,
    #         y=self.df_train[self.y_name].values
    #     )
    #     self.learner_t.feature_names = self.X_names
    #     return self.learner_t_tau

    def compute_feature_importance(self):

        if self.y_name == 'conversion':

            if self.conversion_learner_t is None:
                raise ValueError("Model must be fitted before computing feature importances.")
            
            return self.conversion_learner_t.get_importance(
                X=self.df_train[self.X_names],
                tau=self.conversion_learner_t_tau,
                features=self.X_names,
                normalize=True,
                method='auto'
            )
        
        elif self.y_name == 'benefit':

            if self.benefit_learner_t is None:
                raise ValueError("Model must be fitted before computing feature importances.")
            
            return self.benefit_learner_t.get_importance(
                X=self.df_train[self.X_names],
                tau=self.benefit_learner_t_tau,
                features=self.X_names,
                normalize=True,
                method='auto'
            )
        
        # if self.learner_t is None:
        #     raise ValueError("Model must be fitted before computing feature importances.")
        
        # return self.learner_t.get_importance(
        #     X=self.df_train[self.X_names],
        #     tau=self.learner_t_tau,
        #     features=self.X_names,
        #     normalize=True,
        #     method='auto'
        # )