nevoit commited on
Commit
76cdfb8
1 Parent(s): 978914a

Upload 40 files

Browse files
Files changed (40) hide show
  1. datasets/adult.arff +0 -0
  2. datasets/bank-full.arff +0 -0
  3. figures/Aspose.Words.36be2542-1776-4b1c-8010-360ae82480ae.001.png +0 -0
  4. figures/Aspose.Words.36be2542-1776-4b1c-8010-360ae82480ae.002.png +0 -0
  5. figures/Aspose.Words.36be2542-1776-4b1c-8010-360ae82480ae.003.png +0 -0
  6. figures/Aspose.Words.36be2542-1776-4b1c-8010-360ae82480ae.004.png +0 -0
  7. figures/Aspose.Words.36be2542-1776-4b1c-8010-360ae82480ae.005.png +0 -0
  8. figures/Aspose.Words.36be2542-1776-4b1c-8010-360ae82480ae.006.png +0 -0
  9. figures/Aspose.Words.36be2542-1776-4b1c-8010-360ae82480ae.007.png +0 -0
  10. figures/Aspose.Words.36be2542-1776-4b1c-8010-360ae82480ae.008.png +0 -0
  11. figures/Aspose.Words.36be2542-1776-4b1c-8010-360ae82480ae.009.png +0 -0
  12. figures/Aspose.Words.36be2542-1776-4b1c-8010-360ae82480ae.010.png +0 -0
  13. figures/Aspose.Words.36be2542-1776-4b1c-8010-360ae82480ae.011.png +0 -0
  14. figures/Aspose.Words.36be2542-1776-4b1c-8010-360ae82480ae.012.png +0 -0
  15. figures/Aspose.Words.36be2542-1776-4b1c-8010-360ae82480ae.013.png +0 -0
  16. figures/Aspose.Words.36be2542-1776-4b1c-8010-360ae82480ae.014.png +0 -0
  17. figures/Aspose.Words.36be2542-1776-4b1c-8010-360ae82480ae.015.png +0 -0
  18. figures/Aspose.Words.36be2542-1776-4b1c-8010-360ae82480ae.016.png +0 -0
  19. figures/Aspose.Words.36be2542-1776-4b1c-8010-360ae82480ae.017.png +0 -0
  20. figures/Aspose.Words.36be2542-1776-4b1c-8010-360ae82480ae.018.png +0 -0
  21. figures/Aspose.Words.36be2542-1776-4b1c-8010-360ae82480ae.019.png +0 -0
  22. figures/Aspose.Words.36be2542-1776-4b1c-8010-360ae82480ae.020.png +0 -0
  23. figures/Aspose.Words.36be2542-1776-4b1c-8010-360ae82480ae.021.png +0 -0
  24. figures/Aspose.Words.36be2542-1776-4b1c-8010-360ae82480ae.022.png +0 -0
  25. figures/Aspose.Words.36be2542-1776-4b1c-8010-360ae82480ae.023.png +0 -0
  26. figures/Aspose.Words.36be2542-1776-4b1c-8010-360ae82480ae.024.png +0 -0
  27. figures/adults_fooled.png +0 -0
  28. figures/adults_not_fooled.png +0 -0
  29. figures/bank_full_fooled.png +0 -0
  30. figures/bank_full_not_fooled.png +0 -0
  31. figures/data_adult_ep_10_bs_128_lr_0.001_al_0.5_dr_0.5_losses.png +0 -0
  32. figures/data_adult_ep_10_bs_128_lr_0.001_al_0.5_dr_0.5_pca.png +0 -0
  33. figures/data_bank-full_ep_10_bs_128_lr_0.001_al_0.2_dr_0.3_losses.png +0 -0
  34. figures/data_bank-full_ep_10_bs_128_lr_0.001_al_0.2_dr_0.3_pca.png +0 -0
  35. input_data/adult.arff +0 -0
  36. input_data/bank-full.arff +0 -0
  37. nt_exp.py +203 -0
  38. nt_gan.py +333 -0
  39. nt_gg.py +282 -0
  40. outputs/empty +1 -0
datasets/adult.arff ADDED
The diff for this file is too large to render. See raw diff
 
datasets/bank-full.arff ADDED
The diff for this file is too large to render. See raw diff
 
figures/Aspose.Words.36be2542-1776-4b1c-8010-360ae82480ae.001.png ADDED
figures/Aspose.Words.36be2542-1776-4b1c-8010-360ae82480ae.002.png ADDED
figures/Aspose.Words.36be2542-1776-4b1c-8010-360ae82480ae.003.png ADDED
figures/Aspose.Words.36be2542-1776-4b1c-8010-360ae82480ae.004.png ADDED
figures/Aspose.Words.36be2542-1776-4b1c-8010-360ae82480ae.005.png ADDED
figures/Aspose.Words.36be2542-1776-4b1c-8010-360ae82480ae.006.png ADDED
figures/Aspose.Words.36be2542-1776-4b1c-8010-360ae82480ae.007.png ADDED
figures/Aspose.Words.36be2542-1776-4b1c-8010-360ae82480ae.008.png ADDED
figures/Aspose.Words.36be2542-1776-4b1c-8010-360ae82480ae.009.png ADDED
figures/Aspose.Words.36be2542-1776-4b1c-8010-360ae82480ae.010.png ADDED
figures/Aspose.Words.36be2542-1776-4b1c-8010-360ae82480ae.011.png ADDED
figures/Aspose.Words.36be2542-1776-4b1c-8010-360ae82480ae.012.png ADDED
figures/Aspose.Words.36be2542-1776-4b1c-8010-360ae82480ae.013.png ADDED
figures/Aspose.Words.36be2542-1776-4b1c-8010-360ae82480ae.014.png ADDED
figures/Aspose.Words.36be2542-1776-4b1c-8010-360ae82480ae.015.png ADDED
figures/Aspose.Words.36be2542-1776-4b1c-8010-360ae82480ae.016.png ADDED
figures/Aspose.Words.36be2542-1776-4b1c-8010-360ae82480ae.017.png ADDED
figures/Aspose.Words.36be2542-1776-4b1c-8010-360ae82480ae.018.png ADDED
figures/Aspose.Words.36be2542-1776-4b1c-8010-360ae82480ae.019.png ADDED
figures/Aspose.Words.36be2542-1776-4b1c-8010-360ae82480ae.020.png ADDED
figures/Aspose.Words.36be2542-1776-4b1c-8010-360ae82480ae.021.png ADDED
figures/Aspose.Words.36be2542-1776-4b1c-8010-360ae82480ae.022.png ADDED
figures/Aspose.Words.36be2542-1776-4b1c-8010-360ae82480ae.023.png ADDED
figures/Aspose.Words.36be2542-1776-4b1c-8010-360ae82480ae.024.png ADDED
figures/adults_fooled.png ADDED
figures/adults_not_fooled.png ADDED
figures/bank_full_fooled.png ADDED
figures/bank_full_not_fooled.png ADDED
figures/data_adult_ep_10_bs_128_lr_0.001_al_0.5_dr_0.5_losses.png ADDED
figures/data_adult_ep_10_bs_128_lr_0.001_al_0.5_dr_0.5_pca.png ADDED
figures/data_bank-full_ep_10_bs_128_lr_0.001_al_0.2_dr_0.3_losses.png ADDED
figures/data_bank-full_ep_10_bs_128_lr_0.001_al_0.2_dr_0.3_pca.png ADDED
input_data/adult.arff ADDED
The diff for this file is too large to render. See raw diff
 
input_data/bank-full.arff ADDED
The diff for this file is too large to render. See raw diff
 
nt_exp.py ADDED
@@ -0,0 +1,203 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ import numpy as np
4
+ import pandas as pd
5
+ from scipy.io import arff
6
+ from sklearn import preprocessing
7
+ from sklearn.model_selection import train_test_split
8
+
9
+ from nt_gan import GAN
10
+ from nt_gg import GG
11
+
12
+ dataset_directory = 'datasets'
13
+ saved_models_path = 'outputs'
14
+
15
+
16
+ def prepare_architecture(arff_data_path):
17
+ """
18
+ This function create the architecture of the GAN network.
19
+ The generator and the discriminator are created and then combined into the GAN model
20
+ :param arff_data_path: data path for the arff file
21
+ :return: a dictionary with all the relevant variables for the next stages
22
+ """
23
+ data, meta_data = arff.loadarff(arff_data_path) # This function reads arff file into tuple of data and its meta.
24
+ df = pd.DataFrame(data)
25
+ columns = df.columns
26
+ transformed_data, x, x_scaled, meta_data_rev, min_max_scaler = create_scaled_data(df, meta_data)
27
+
28
+ number_of_features = len(transformed_data.columns) # Define the GAN and training parameters
29
+
30
+ return x_scaled, meta_data_rev, columns, min_max_scaler, number_of_features
31
+
32
+
33
+ def create_scaled_data(df, meta_data):
34
+ """
35
+
36
+ :param df:
37
+ :param meta_data:
38
+ :return:
39
+ """
40
+ meta_data_dict = {k: {a.replace(' ', ''): b + 1 for b, a in enumerate(v.values)} for k, v in
41
+ meta_data._attributes.items() if
42
+ v.type_name != 'numeric'} # Starts from one and not zero because one is for Nan values
43
+ meta_data_rev = {k: {b + 1: a.replace(' ', '') for b, a in enumerate(v.values)} for k, v in
44
+ meta_data._attributes.items() if
45
+ v.type_name != 'numeric'} # Starts from one and not zero because one is for Nan values
46
+ transformed_data = df.copy()
47
+ for col in df.columns:
48
+ if col in meta_data_dict:
49
+ # Sometimes the values can not be found in the meta data, so we treat these values as Nan
50
+ transformed_data[col] = transformed_data[col].apply(
51
+ lambda x: meta_data_dict[col][str(x).split('\'')[1]] if str(x).split('\'')[1] in meta_data_dict[
52
+ col] else 0)
53
+ x = transformed_data.values # returns a numpy array
54
+ min_max_scaler = preprocessing.MinMaxScaler()
55
+ x_scaled = min_max_scaler.fit_transform(x)
56
+ return transformed_data, x, x_scaled, meta_data_rev, min_max_scaler
57
+
58
+
59
+ def re_scaled_data(data, columns, meta_data_rev, min_max_scaler):
60
+ """
61
+ This function re-scaled the fake data to the original format.
62
+ :param data: the data we want to re scaled
63
+ :param columns:
64
+ :param meta_data_rev:
65
+ :return:
66
+ """
67
+ data_inv = min_max_scaler.inverse_transform(data)
68
+ df = pd.DataFrame(data_inv, columns=columns)
69
+ transformed_data = df.copy()
70
+ for col in transformed_data.columns:
71
+ if col in meta_data_rev:
72
+ # Sometimes the values can not be found in the meta data, so we treat these values as Nan
73
+ transformed_data[col] = transformed_data[col].apply(
74
+ lambda x: meta_data_rev[col][int(round(x))] if int(round(x)) in meta_data_rev[
75
+ col] else np.nan)
76
+ return transformed_data
77
+
78
+
79
+ def first_question():
80
+ """
81
+ This function answers the first question
82
+ :return:
83
+ """
84
+ to_plot_losses = True
85
+ results_output = os.path.join(saved_models_path, f'question_one_results.csv')
86
+ results = {'dataset': [], 'lr': [], 'ep': [], 'bs': [], 'alpha': [], 'dropout': [], 'gen_loss': [], 'dis_loss': [],
87
+ 'activation': [], 'fooled_len': [], 'not_fooled_len': [], 'mean_min_distance_fooled': [],
88
+ 'mean_min_distance_not_fooled': [], 'mean_min_distance_gap': []}
89
+ # w1 * (MMDF + MMDNF) - w3 * (MMDG) + w2 * (NFL/ 100)
90
+ # MMDG = MMDNF - MMDF
91
+ # data_name = ["adult", "bank-full"]
92
+ # learning_rate = [0.01, 0.001, 0.0001]
93
+ # epochs = [5, 10, 15]
94
+ # batch_size = [64, 128, 1024]
95
+ # alpha_relu = [0.2, 0.5]
96
+ # dropout = [0.3, 0.5]
97
+ data_name = ["adult"]
98
+ learning_rate = [0.001]
99
+ epochs = [10]
100
+ batch_size = [128]
101
+ alpha_relu = [0.5]
102
+ dropout = [0.5]
103
+ loss = 'binary_crossentropy'
104
+ activation = 'sigmoid'
105
+
106
+ for data in data_name:
107
+ for lr in learning_rate:
108
+ for ep in epochs:
109
+ for bs in batch_size:
110
+ for al in alpha_relu:
111
+ for dr in dropout:
112
+ arff_data_path = f'./datasets/{data}.arff'
113
+ model_name = f'data_{data}_ep_{ep}_bs_{bs}_lr_{lr}_al_{al}_dr_{dr}'
114
+ pca_output = os.path.join(saved_models_path, f'{model_name}_pca.png')
115
+ fooled_output = os.path.join(saved_models_path, f'{model_name}_fooled.csv')
116
+ not_fooled_output = os.path.join(saved_models_path, f'{model_name}_not_fooled.csv')
117
+
118
+ x_scaled, meta_data_rev, columns, min_max_scaler, number_of_features = prepare_architecture(
119
+ arff_data_path)
120
+ gan_obj = GAN(number_of_features=number_of_features, saved_models_path=saved_models_path,
121
+ learning_rate=lr, alpha_relu=al, dropout=dr,
122
+ loss=loss, activation=activation)
123
+ gen_loss, dis_loss = gan_obj.train(scaled_data=x_scaled, epochs=ep, batch_size=bs,
124
+ to_plot_losses=to_plot_losses, model_name=model_name)
125
+ dis_fooled_scaled, dis_not_fooled_scaled, mean_min_distance_fooled, mean_min_distance_not_fooled = gan_obj.test(
126
+ scaled_data=x_scaled, sample_num=100, pca_output=pca_output)
127
+ dis_fooled = re_scaled_data(data=dis_fooled_scaled, columns=columns,
128
+ meta_data_rev=meta_data_rev,
129
+ min_max_scaler=min_max_scaler)
130
+ dis_fooled.to_csv(fooled_output)
131
+ dis_not_fooled = re_scaled_data(data=dis_not_fooled_scaled, columns=columns,
132
+ meta_data_rev=meta_data_rev,
133
+ min_max_scaler=min_max_scaler)
134
+ dis_not_fooled.to_csv(not_fooled_output)
135
+ results['dataset'].append(data)
136
+ results['lr'].append(lr)
137
+ results['ep'].append(ep)
138
+ results['bs'].append(bs)
139
+ results['alpha'].append(al)
140
+ results['dropout'].append(dr)
141
+ results['gen_loss'].append(gen_loss)
142
+ results['dis_loss'].append(dis_loss)
143
+ results['activation'].append(activation)
144
+ results['fooled_len'].append(len(dis_fooled_scaled))
145
+ results['not_fooled_len'].append(len(dis_not_fooled_scaled))
146
+ results['mean_min_distance_fooled'].append(mean_min_distance_fooled)
147
+ results['mean_min_distance_not_fooled'].append(mean_min_distance_not_fooled)
148
+ results['mean_min_distance_gap'].append(mean_min_distance_not_fooled-mean_min_distance_fooled)
149
+ results_df = pd.DataFrame.from_dict(results)
150
+ results_df.to_csv(results_output, index=False)
151
+
152
+
153
+ def second_question():
154
+
155
+ data_name = ["adult", "bank-full"]
156
+ learning_rate = [0.001]
157
+ epochs = [10]
158
+ batch_size = [128]
159
+ alpha_relu = [0.2]
160
+ dropout = [0.3]
161
+ results = {'dataset': [], 'lr': [], 'ep': [], 'bs': [], 'alpha': [], 'dropout': [], 'gen_loss': [], 'proba_error': []}
162
+ combs = len(data_name) * len(learning_rate) * len(epochs) * len(batch_size) * len(alpha_relu) * len(dropout)
163
+ i = 1
164
+ for data in data_name:
165
+ for lr in learning_rate:
166
+ for ep in epochs:
167
+ for bs in batch_size:
168
+ for al in alpha_relu:
169
+ for dr in dropout:
170
+ print(f'Running combination {i}/{combs}')
171
+ data_path = f'./datasets/{data}.arff'
172
+ model_name = f'data_{data}_ep_{ep}_bs_{bs}_lr_{lr}_part2'
173
+ x_scaled, meta_data_rev, cols, min_max_scaler, feature_num = prepare_architecture(data_path)
174
+ general_generator = GG(feature_num, saved_models_path, lr, dr, al)
175
+ x_train, x_test, y_train, y_test = train_test_split(x_scaled[:, :-1], x_scaled[:, -1], test_size=0.1)
176
+ general_generator.train_gg(x_train, y_train, ep, bs, model_name, data, saved_models_path, True)
177
+ error = general_generator.get_error()
178
+ results['dataset'].append(data)
179
+ results['lr'].append(lr)
180
+ results['ep'].append(ep)
181
+ results['bs'].append(bs)
182
+ results['alpha'].append(al)
183
+ results['dropout'].append(dr)
184
+ results['gen_loss'].append(general_generator.losses['gen_loss'][-1])
185
+ results['proba_error'].append(error.mean())
186
+ i += 1
187
+ # Test set performance
188
+ general_generator.plot_discriminator_results(x_test, y_test, data, saved_models_path)
189
+ general_generator.plot_generator_results(data, saved_models_path)
190
+
191
+ results_output = os.path.join(saved_models_path, f'question_two_results.csv')
192
+ results_df = pd.DataFrame.from_dict(results)
193
+ # results_df.to_csv(results_output, index=False)
194
+
195
+
196
+
197
+ def main():
198
+ # first_question()
199
+ second_question()
200
+
201
+
202
+ if __name__ == '__main__':
203
+ main()
nt_gan.py ADDED
@@ -0,0 +1,333 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from itertools import compress
3
+
4
+ import matplotlib.pyplot as plt
5
+ import numpy as np
6
+ import pandas as pd
7
+ from keras.layers import Dense, Dropout, LeakyReLU
8
+ from keras.models import Sequential
9
+ from keras.optimizers import Adam
10
+ from numpy.random import randn
11
+ from sklearn.decomposition import PCA
12
+ from tqdm import tqdm
13
+
14
+
15
+ class GAN(object):
16
+ def __init__(self, number_of_features, saved_models_path, learning_rate, alpha_relu, dropout, loss, activation):
17
+ """
18
+ A constructor for the GAN class
19
+ :param number_of_features: number of features
20
+ :param saved_models_path: the output folder path
21
+ """
22
+ self.saved_models_path = saved_models_path
23
+ self.number_of_features = number_of_features
24
+
25
+ self.generator_model = None
26
+ self.noise_dim = None
27
+ self.discriminator_model = None
28
+ self.learning_rate = learning_rate
29
+ self.gan_model = None
30
+ self.activation = activation
31
+ self.alpha_relu = alpha_relu
32
+ self.loss = loss
33
+ self.dropout = dropout
34
+ self.number_of_features = number_of_features
35
+
36
+ self.build_generator() # build the generator
37
+ self.build_discriminator() # build the discriminator
38
+ self.build_gan() # build the GAN
39
+
40
+ def build_generator(self):
41
+ """
42
+ This function creates the generator model
43
+ :return:
44
+ """
45
+ noise_size = int(self.number_of_features / 2)
46
+ self.noise_dim = (noise_size,) # size of the noise space
47
+
48
+ self.generator_model = Sequential()
49
+ self.generator_model.add(Dense(int(self.number_of_features * 2), input_shape=self.noise_dim))
50
+ self.generator_model.add(LeakyReLU(alpha=self.alpha_relu))
51
+
52
+ self.generator_model.add(Dense(int(self.number_of_features * 4)))
53
+ self.generator_model.add(LeakyReLU(alpha=self.alpha_relu))
54
+ self.generator_model.add(Dropout(self.dropout))
55
+
56
+ self.generator_model.add(Dense(int(self.number_of_features * 2)))
57
+ self.generator_model.add(LeakyReLU(alpha=self.alpha_relu))
58
+ self.generator_model.add(Dropout(self.dropout))
59
+
60
+ # Compile it
61
+ self.generator_model.add(Dense(self.number_of_features, activation=self.activation))
62
+ self.generator_model.summary()
63
+
64
+ def build_discriminator(self):
65
+ """
66
+ Create discriminator model
67
+ :return:
68
+ """
69
+ self.discriminator_model = Sequential()
70
+
71
+ self.discriminator_model.add(Dense(self.number_of_features * 2, input_shape=(self.number_of_features,)))
72
+ self.discriminator_model.add(LeakyReLU(alpha=self.alpha_relu))
73
+
74
+ self.discriminator_model.add(Dense(self.number_of_features * 4))
75
+ self.discriminator_model.add(LeakyReLU(alpha=self.alpha_relu))
76
+ self.discriminator_model.add(Dropout(self.dropout))
77
+
78
+ self.discriminator_model.add(Dense(self.number_of_features * 2))
79
+ self.discriminator_model.add(LeakyReLU(alpha=self.alpha_relu))
80
+ self.discriminator_model.add(Dropout(self.dropout))
81
+
82
+ # Compile it
83
+ self.discriminator_model.add(Dense(1, activation=self.activation))
84
+ optimizer = Adam(lr=self.learning_rate)
85
+ self.discriminator_model.compile(loss=self.loss, optimizer=optimizer)
86
+ self.discriminator_model.summary()
87
+
88
+ def build_gan(self):
89
+ """
90
+ Create the GAN network
91
+ :return: the GAN model object
92
+ """
93
+ self.gan_model = Sequential()
94
+ self.discriminator_model.trainable = False
95
+
96
+ # The following lines connect the generator and discriminator models to the GAN.
97
+ self.gan_model.add(self.generator_model)
98
+ self.gan_model.add(self.discriminator_model)
99
+
100
+ # Compile it
101
+ optimizer = Adam(lr=self.learning_rate)
102
+ self.gan_model.compile(loss=self.loss, optimizer=optimizer)
103
+
104
+ return self.gan_model
105
+
106
+ def train(self, scaled_data, epochs, batch_size, to_plot_losses, model_name):
107
+ """
108
+ This function trains the generator and discriminator outputs
109
+ :param model_name:
110
+ :param to_plot_losses: whether or not to plot history
111
+ :param scaled_data: the data after min max scaling
112
+ :param epochs: number of epochs
113
+ :param batch_size: the batch size
114
+ :return: losses_list: returns the losses dictionary the generator or discriminator outputs
115
+ """
116
+ dis_output, gen_output, prev_output = self.check_for_existed_output(model_name)
117
+ if prev_output:
118
+ return -1, -1
119
+
120
+ losses_output = os.path.join(self.saved_models_path, f'{model_name}_losses.png')
121
+ discriminator_loss = []
122
+ generator_loss = []
123
+
124
+ # We need to use half of the batch size for the fake data and half for the real one
125
+ half_batch_size = int(batch_size / 2)
126
+ iterations = int(len(scaled_data) / half_batch_size)
127
+ iterations = iterations + 1 if len(scaled_data) % batch_size != 0 else iterations
128
+
129
+ for epoch in range(1, epochs + 1): # iterates over the epochs
130
+ np.random.shuffle(scaled_data)
131
+ p_bar = tqdm(range(iterations), ascii=True)
132
+ for iteration in p_bar:
133
+ dis_loss, gen_loss = self.train_models(batch_size=batch_size, half_batch_size=half_batch_size,
134
+ index=iteration, scaled_data=scaled_data)
135
+ discriminator_loss.append(dis_loss)
136
+ generator_loss.append(gen_loss)
137
+ p_bar.set_description(
138
+ f"Epoch ({epoch}/{epochs}) | DISCRIMINATOR LOSS: {dis_loss:.2f} | GENERATOR LOSS: {gen_loss:.2f} |")
139
+
140
+ # Save weights for future use
141
+ self.discriminator_model.save_weights(dis_output)
142
+ self.generator_model.save_weights(gen_output)
143
+
144
+ # Plot losses
145
+ if to_plot_losses:
146
+ self.plot_losses(discriminator_loss=discriminator_loss, generator_loss=generator_loss,
147
+ losses_output=losses_output)
148
+
149
+ return generator_loss[-1], discriminator_loss[-1]
150
+
151
+ def check_for_existed_output(self, model_name) -> (str, str, bool):
152
+ """
153
+ This function checks for existed output
154
+ :param model_name: model's name
155
+ :return:
156
+ """
157
+ prev_output = False
158
+ dis_output = os.path.join(self.saved_models_path, f'{model_name}_dis_weights.h5')
159
+ gen_output = os.path.join(self.saved_models_path, f'{model_name}_gen_weights.h5')
160
+ if os.path.exists(dis_output) and os.path.exists(gen_output):
161
+ print("The model was trained in the past")
162
+ self.discriminator_model.load_weights(dis_output)
163
+ self.generator_model.load_weights(gen_output)
164
+ prev_output = True
165
+ return dis_output, gen_output, prev_output
166
+
167
+ def train_models(self, batch_size, half_batch_size, index, scaled_data):
168
+ """
169
+ This function trains the discriminator and the generator
170
+ :param batch_size: batch size
171
+ :param half_batch_size: half of the batch size
172
+ :param index:
173
+ :param scaled_data:
174
+ :return:
175
+ """
176
+ self.discriminator_model.trainable = True
177
+
178
+ # Create a batch of real data and train the model
179
+ x_real, y_real = self.get_real_samples(data=scaled_data, batch_size=half_batch_size, index=index)
180
+ d_real_loss = self.discriminator_model.train_on_batch(x_real, y_real)
181
+
182
+ # Create a batch of fake data and train the model
183
+ x_fake, y_fake = self.create_fake_samples(batch_size=half_batch_size)
184
+ d_fake_loss = self.discriminator_model.train_on_batch(x_fake, y_fake)
185
+
186
+ avg_dis_loss = 0.5 * (d_real_loss + d_fake_loss)
187
+
188
+ # Create noise for the generator model
189
+ noise = randn(self.noise_dim[0] * batch_size).reshape((batch_size, self.noise_dim[0]))
190
+
191
+ self.discriminator_model.trainable = False
192
+ gen_loss = self.gan_model.train_on_batch(noise, np.ones((batch_size, 1)))
193
+
194
+ return avg_dis_loss, gen_loss
195
+
196
+ @staticmethod
197
+ def get_real_samples(data, batch_size, index):
198
+ """
199
+ Generate batch_size of real samples with class labels
200
+ :param data: the original data
201
+ :param batch_size: batch size
202
+ :param index: the index of the batch
203
+ :return: x: real samples, y: labels
204
+ """
205
+ start_index = batch_size * index
206
+ end_index = start_index + batch_size
207
+ x = data[start_index: end_index]
208
+
209
+ return x, np.ones((len(x), 1))
210
+
211
+ def create_fake_samples(self, batch_size):
212
+ """
213
+ Use the generator to generate n fake examples, with class labels
214
+ :param batch_size: batch size
215
+ :return:
216
+ """
217
+ noise = randn(self.noise_dim[0] * batch_size).reshape((batch_size, self.noise_dim[0]))
218
+ x = self.generator_model.predict(noise) # create fake samples using the generator
219
+
220
+ return x, np.zeros((len(x), 1))
221
+
222
+ @staticmethod
223
+ def plot_losses(discriminator_loss, generator_loss, losses_output):
224
+ """
225
+ Plot training loss values
226
+ :param generator_loss:
227
+ :param discriminator_loss:
228
+ :param losses_output:
229
+ :return:
230
+ """
231
+ plt.plot(discriminator_loss)
232
+ plt.plot(generator_loss)
233
+ plt.xlabel('Iteration')
234
+ plt.ylabel('Loss')
235
+ plt.title('Discriminator and Generator Losses')
236
+ plt.legend(['Discriminator Loss', 'Generator Loss'])
237
+ plt.savefig(losses_output)
238
+
239
+ @staticmethod
240
+ def return_minimum_euclidean_distance(scaled_data, x):
241
+ """
242
+ This function returns the
243
+ :param scaled_data: the original data
244
+ :param x: a record we want to compare with
245
+ :return: the minimum distance and the index of the minimum value
246
+ """
247
+ s = np.power(np.power((scaled_data - np.array(x)), 2).sum(1), 0.5)
248
+ return pd.Series([s[s.argmin()], s.argmin()])
249
+
250
+ def test(self, scaled_data, sample_num, pca_output):
251
+ """
252
+ This function tests the model
253
+ :param scaled_data: the original scaled data
254
+ :param sample_num: number of samples to generate
255
+ :param pca_output: the output of PCA
256
+ :return:
257
+ """
258
+ x_fake, y_fake = self.create_fake_samples(batch_size=sample_num)
259
+ fake_pred = self.discriminator_model.predict(x_fake)
260
+
261
+ # Filter data to different matrices
262
+ dis_fooled_scaled = np.asarray(list(compress(x_fake, fake_pred > 0.5)))
263
+ dis_not_fooled_scaled = np.asarray(list(compress(x_fake, fake_pred <= 0.5)))
264
+
265
+ # ------------- Euclidean -------------
266
+ mean_min_distance_fooled, mean_min_distance_not_fooled = (-1, -1)
267
+ if len(dis_fooled_scaled) > 0 and len(dis_not_fooled_scaled) > 0:
268
+ mean_min_distance_fooled = self.get_mean_distance_score(scaled_data, dis_fooled_scaled)
269
+ print(f'The mean minimum distance for fooled samples is {mean_min_distance_fooled}')
270
+ mean_min_distance_not_fooled = self.get_mean_distance_score(scaled_data, dis_not_fooled_scaled)
271
+ print(f'The mean minimum distance for not fooled samples is {mean_min_distance_not_fooled}')
272
+ else:
273
+ print(f'The fooled xor the not Fooled data frames is empty')
274
+
275
+ # ------------- PCA --------------
276
+ data_pca_df = self.get_pca_df(scaled_data, 'original')
277
+ dis_fooled_pca_df = self.get_pca_df(dis_fooled_scaled, 'fooled')
278
+ dis_not_fooled_pca_df = self.get_pca_df(dis_not_fooled_scaled, 'not fooled')
279
+ pca_frames = [data_pca_df, dis_fooled_pca_df, dis_not_fooled_pca_df]
280
+ pca_result = pd.concat(pca_frames)
281
+ self.plot_pca(pca_result, pca_output)
282
+
283
+ return dis_fooled_scaled, dis_not_fooled_scaled, mean_min_distance_fooled, mean_min_distance_not_fooled
284
+
285
+ def get_mean_distance_score(self, scaled_data, dis_scaled):
286
+ """
287
+ This function returns the mean distance score for the given dataframe
288
+ :param scaled_data: the original data
289
+ :param dis_scaled: a dataframe
290
+ :return:
291
+ """
292
+ dis_fooled_scaled_ecu = pd.DataFrame(dis_scaled)
293
+ dis_fooled_scaled_ecu[['min_distance', 'similar_i']] = dis_fooled_scaled_ecu.apply(
294
+ lambda x: self.return_minimum_euclidean_distance(scaled_data, x), axis=1)
295
+ mean_min_distance_fooled = dis_fooled_scaled_ecu['min_distance'].mean()
296
+ return mean_min_distance_fooled
297
+
298
+ @staticmethod
299
+ def plot_pca(pca_result, pca_output):
300
+ """
301
+ This function plots the PCA figure
302
+ :param pca_result: dataframe with all the results
303
+ :param pca_output: output path
304
+ :return:
305
+ """
306
+ fig = plt.figure(figsize=(8, 8))
307
+ ax = fig.add_subplot(1, 1, 1)
308
+ ax.set_xlabel('Principal Component 1', fontsize=15)
309
+ ax.set_ylabel('Principal Component 2', fontsize=15)
310
+ ax.set_title('PCA With Two Components', fontsize=20)
311
+ targets = ['original', 'fooled', 'not fooled']
312
+ colors = ['r', 'g', 'b']
313
+ for target, color in zip(targets, colors):
314
+ indices_to_keep = pca_result['name'] == target
315
+ ax.scatter(pca_result.loc[indices_to_keep, 'comp1'], pca_result.loc[indices_to_keep, 'comp2'],
316
+ c=color, s=50)
317
+ ax.legend(targets)
318
+ ax.grid()
319
+ plt.savefig(pca_output)
320
+
321
+ @staticmethod
322
+ def get_pca_df(scaled_data, data_name):
323
+ """
324
+ This function creates the PCA dataframe
325
+ :param scaled_data: the original data
326
+ :param data_name: the name of the column
327
+ :return:
328
+ """
329
+ pca = PCA(n_components=2)
330
+ principal_components = pca.fit_transform(scaled_data)
331
+ principal_df = pd.DataFrame(data=principal_components, columns=['comp1', 'comp2'])
332
+ principal_df['name'] = data_name
333
+ return principal_df
nt_gg.py ADDED
@@ -0,0 +1,282 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import pickle
3
+ import matplotlib.pyplot as plt
4
+ import numpy as np
5
+ from keras.layers import Dense, Dropout, LeakyReLU
6
+ from keras.models import Sequential
7
+ from keras.optimizers import Adam
8
+ from numpy.random import randn
9
+ from sklearn.ensemble import RandomForestClassifier
10
+ from sklearn import metrics
11
+ from tqdm import tqdm
12
+
13
+
14
+ class GG(object):
15
+
16
+ def __init__(self, number_of_features, saved_models_path, learning_rate, dropout, alpha):
17
+ """
18
+ The constructor for the General Generator class.
19
+ :param number_of_features: Number of features in the data. Used to determine the noise dimensions
20
+ :param saved_models_path: The folder where we save the models.
21
+ """
22
+ self.saved_models_path = saved_models_path
23
+ self.number_of_features = number_of_features
24
+
25
+ self.generator_model = None
26
+ self.discriminator_model = RandomForestClassifier()
27
+ self.dropout = dropout
28
+ self.alpha = alpha
29
+ self.noise_dim = int(number_of_features / 2)
30
+ self.learning_rate = learning_rate
31
+ self.number_of_features = number_of_features
32
+ self.build_generator() # build the generator.
33
+ self.losses = {'gen_loss': [], 'dis_loss_pred': [], 'dis_loss_proba': []}
34
+ # self.results = {}
35
+
36
+ def build_generator(self):
37
+ """
38
+ This function creates the generator model for the GG.
39
+ We used a fairly simple MLP architecture.
40
+ :return:
41
+ """
42
+
43
+ self.generator_model = Sequential()
44
+ self.generator_model.add(Dense(int(self.number_of_features * 2), input_shape=(self.noise_dim + 1, )))
45
+ self.generator_model.add(LeakyReLU(alpha=self.alpha))
46
+
47
+ self.generator_model.add(Dense(int(self.number_of_features * 4)))
48
+ self.generator_model.add(LeakyReLU(alpha=self.alpha))
49
+ self.generator_model.add(Dropout(self.dropout))
50
+
51
+ self.generator_model.add(Dense(int(self.number_of_features * 2)))
52
+ self.generator_model.add(LeakyReLU(alpha=self.alpha))
53
+ self.generator_model.add(Dropout(self.dropout))
54
+
55
+ self.generator_model.add(Dense(self.number_of_features, activation='sigmoid'))
56
+ optimizer = Adam(lr=self.learning_rate)
57
+ self.generator_model.compile(loss='categorical_crossentropy', optimizer=optimizer)
58
+
59
+ # self.generator_model.summary()
60
+
61
+ def train_gg(self, x_train, y_train, epochs, batch_size, model_name, data, output_path, to_plot=False):
62
+ """
63
+ This function running the training stage manually.
64
+ :param output_path: Path to save loss fig
65
+ :param to_plot: Plots the losses if True
66
+ :param x_train: the training set features
67
+ :param y_train: the training set classes
68
+ :param model_name: name of model to save (for generator)
69
+ :param epochs: number of epochs
70
+ :param batch_size: the batch size
71
+ :return: trains the discriminator and generator.
72
+ """
73
+
74
+ losses_path = os.path.join(self.saved_models_path, f'{model_name}_losses')
75
+ model_file = os.path.join(self.saved_models_path, f'{model_name}_part_2_gen_weights.h5')
76
+
77
+ # First train the discriminator
78
+ self.train_black_box_dis(x_train, y_train)
79
+ self.train_generator(x_train, model_file, epochs, batch_size, losses_path)
80
+ if to_plot:
81
+ self.plot_losses(data, output_path)
82
+
83
+ def train_black_box_dis(self, x_train, y_train):
84
+ """
85
+ Trains the discriminator and saves it.
86
+ :param x_train: the training set features
87
+ :param y_train: the training set classes
88
+ :return:
89
+ """
90
+ dis_output = os.path.join(self.saved_models_path, 'black_box_dis_model')
91
+
92
+ if os.path.exists(dis_output):
93
+ # print('Blackbox discriminator already trained')
94
+ with open(dis_output, 'rb') as rf_file:
95
+ self.discriminator_model = pickle.load(rf_file)
96
+
97
+ self.discriminator_model.fit(x_train, y_train)
98
+ with open(dis_output, 'wb') as rf_file:
99
+ pickle.dump(self.discriminator_model, rf_file)
100
+
101
+ def train_generator(self, data, model_path, epochs, start_batch_size, losses_path):
102
+ """
103
+ Function for training the general generator.
104
+ :param losses_path: The filepath for the loss results
105
+ :param data: The normalized dataset
106
+ :param model_path: The name of the model to save. includes epoch size, batches etc.
107
+ :param epochs: Number of epochs
108
+ :param start_batch_size: Size of batch to use.
109
+ :return: trains the generator, saves it and the losses during training.
110
+ """
111
+
112
+ if os.path.exists(model_path):
113
+ self.generator_model.load_weights(model_path)
114
+ with open(losses_path, 'rb') as loss_file:
115
+ self.losses = pickle.load(loss_file)
116
+ return
117
+
118
+ for epoch in range(epochs): # iterates over the epochs
119
+ np.random.shuffle(data)
120
+ batch_size = start_batch_size
121
+ for i in tqdm(range(0, data.shape[0], batch_size), ascii=True): # Iterate over batches
122
+ if data.shape[0] - i >= batch_size:
123
+ batch_input = data[i:i + batch_size]
124
+ else: # The last iteration
125
+ batch_input = data[i:]
126
+ batch_size = batch_input.shape[0]
127
+
128
+ g_loss = self.train_generator_on_batch(batch_input)
129
+ self.losses['gen_loss'].append(g_loss)
130
+
131
+ self.save_generator_model(model_path, losses_path)
132
+
133
+ def save_generator_model(self, generator_model_path, losses_path):
134
+ """
135
+ Saves the model and the loss data with pickle.
136
+
137
+ :param generator_model_path: File path for the generator
138
+ :param losses_path: File path for the losses
139
+ :return:
140
+ """
141
+ self.generator_model.save_weights(generator_model_path)
142
+ with open(losses_path, 'wb+') as loss_file:
143
+ pickle.dump(self.losses, loss_file)
144
+
145
+ def train_generator_on_batch(self, batch_input):
146
+ """
147
+ Trains the generator for a single batch. Creates the necessary input, comprised of noise and the real
148
+ probabilities obtained from the black box. Compared to the target output, made of real samples and the
149
+ probabilities made up by the generator.
150
+ :param batch_input:
151
+ :return:
152
+ """
153
+ batch_size = batch_input.shape[0]
154
+ discriminator_probabilities = self.discriminator_model.predict_proba(batch_input)[:, -1:]
155
+ # noise = randn(self.noise_dim * batch_size).reshape((batch_size, self.noise_dim))
156
+
157
+ noise = randn(batch_size, self.noise_dim)
158
+ gen_model_input = np.hstack([noise, discriminator_probabilities])
159
+ generated_probabilities = self.generator_model.predict(gen_model_input)[:, -1:] # Take only probabilities
160
+ target_output = np.hstack([batch_input, generated_probabilities])
161
+ g_loss = self.generator_model.train_on_batch(gen_model_input, target_output) # The actual training
162
+
163
+ return g_loss
164
+
165
+ def plot_discriminator_results(self, x_test, y_test, data, path):
166
+ """
167
+ :param x_test: Test set
168
+ :param y_test: Test classes
169
+ :return: Prints the required plots.
170
+ """
171
+
172
+ blackbox_probs = self.discriminator_model.predict_proba(x_test)
173
+ discriminator_predictions = self.discriminator_model.predict(x_test)
174
+ count_1 = int(np.sum(y_test))
175
+ count_0 = int(y_test.shape[0] - count_1)
176
+ class_data = (['Class 0', 'Class 1'], [count_0, count_1])
177
+ self.plot_data(class_data, path, mode='bar', x_title='Class', title=f'Distribution of classes - {data} dataset')
178
+ self.plot_data(blackbox_probs[:, 0], path, title=f'Probabilities for test set - class 0 - {data} dataset')
179
+ self.plot_data(blackbox_probs[:, 1], path, title=f'Probabilities for test set - class 1 - {data} dataset')
180
+
181
+ min_confidence = blackbox_probs[:, 0].min(), blackbox_probs[:, 1].min()
182
+ max_confidence = blackbox_probs[:, 0].max(), blackbox_probs[:, 1].max()
183
+ mean_confidence = blackbox_probs[:, 0].mean(), blackbox_probs[:, 1].mean()
184
+
185
+ print("Accuracy:", metrics.accuracy_score(y_test, discriminator_predictions))
186
+ for c in [0, 1]:
187
+ print(f'Class {c} - Min confidence: {min_confidence[c]} - Max Confidence: {max_confidence[c]} - '
188
+ f'Mean confidence: {mean_confidence[c]}')
189
+
190
+ def plot_generator_results(self, data, path, num_of_instances=1000):
191
+ """
192
+ Creates plots for the generator results on 1000 instances.
193
+ :param path:
194
+ :param data: Name of dataset used.
195
+ :param num_of_instances: Number of samples to generate.
196
+ :return:
197
+ """
198
+ sampled_proba, generated_instances = self.generate_n_samples(num_of_instances)
199
+
200
+ proba_fake = self.discriminator_model.predict_proba(generated_instances[:, :-1])
201
+ for c in [0, 1]:
202
+ title = f'Confidence Score for Class {c} of Fake Samples - {data} dataset'
203
+ self.plot_data(proba_fake[:, c], path, x_title='Confidence Score', title=title)
204
+
205
+ black_box_confidence = proba_fake[:, 1:]
206
+ proba_error = np.abs(sampled_proba - black_box_confidence)
207
+ generated_classes = np.array([int(round(c)) for c in generated_instances[:, -1].tolist()]).reshape(1000, 1)
208
+ proba_stats = np.hstack([sampled_proba, generated_classes, proba_fake[:, :1], proba_fake[:, 1:], proba_error])
209
+
210
+ for c in [0, 1]:
211
+ class_data = proba_stats[proba_stats[:, 1] == c]
212
+ class_data = class_data[class_data[:, 0].argsort()] # Sort it for the plot
213
+ title = f'Error rate for different probabilities, class {c} - {data} dataset'
214
+ self.plot_data((class_data[:, 0], class_data[:, -1]), path, mode='plot', y_title='error rate', title=title)
215
+
216
+ def generate_n_samples(self, n):
217
+ """
218
+ Functions for generating N samples with a uniformly distribution confidence level.
219
+ :param n: Number of samples
220
+ :return: a tuple of the confidence scores used and the samples created.
221
+ """
222
+ noise = randn(n, self.noise_dim)
223
+ # confidences = np.sort(np.random.uniform(0, 1, (n, 1)), axis=0)
224
+ confidences = np.random.uniform(0, 1, (n, 1))
225
+
226
+ generator_input = np.hstack([noise, confidences]) # Stick them together
227
+ generated_instances = self.generator_model.predict(generator_input) # Create samples
228
+
229
+ return confidences, generated_instances
230
+
231
+ @staticmethod
232
+ def plot_data(data, path, mode='hist', x_title='Probabilities', y_title='# of Instances', title='Distribution'):
233
+ """
234
+ :param path: Path to save
235
+ :param mode: Mode to use
236
+ :param y_title: Title of y axis
237
+ :param x_title: Title of x axis
238
+ :param data: Data to plot
239
+ :param title: Title of plot
240
+ :return: Prints a plot
241
+ """
242
+ plt.clf()
243
+
244
+ if mode == 'hist':
245
+ plt.hist(data)
246
+ elif mode == 'bar':
247
+ plt.bar(data[0], data[1])
248
+ else:
249
+ plt.plot(data[0], data[1])
250
+
251
+ plt.title(title)
252
+ plt.ylabel(y_title)
253
+ plt.xlabel(x_title)
254
+ # plt.show()
255
+ path = os.path.join(path, title)
256
+ plt.savefig(path)
257
+
258
+ def plot_losses(self, data, path):
259
+ """
260
+ Plot the losses while training
261
+ :return:
262
+ """
263
+ plt.clf()
264
+ plt.plot(self.losses['gen_loss'])
265
+ plt.title('Model loss')
266
+ plt.ylabel('Loss')
267
+ plt.xlabel('Iteration')
268
+ # plt.show()
269
+ plt.savefig(os.path.join(path, f'{data} dataset - general_generator_loss.png'))
270
+
271
+ def get_error(self, num_of_instances=1000):
272
+ """
273
+ Calculates the error of the generator we created by measuring the difference between the probability that
274
+ was given as input and the probability of the discriminator on the sample created.
275
+ :param num_of_instances: Number of samples to generate.
276
+ :return: An array of errors.
277
+ """
278
+ sampled_proba, generated_instances = self.generate_n_samples(num_of_instances)
279
+ proba_fake = self.discriminator_model.predict_proba(generated_instances[:, :-1])
280
+ black_box_confidence = proba_fake[:, 1:]
281
+ return np.abs(sampled_proba - black_box_confidence)
282
+
outputs/empty ADDED
@@ -0,0 +1 @@
 
 
1
+