wilmars commited on
Commit
6b8bdc1
β€’
1 Parent(s): 23fec3c

Upload 16 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ data/MercadoLibre[[:space:]]Data[[:space:]]Scientist[[:space:]]Technical[[:space:]]Challenge[[:space:]]-[[:space:]]Dataset.csv filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -1,13 +1,11 @@
1
  ---
2
- title: Fraud Meli App
3
- emoji: πŸ“ˆ
4
- colorFrom: yellow
5
  colorTo: gray
6
  sdk: gradio
7
- sdk_version: 3.47.1
8
- app_file: app.py
9
  pinned: false
10
  license: mit
11
  ---
12
-
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: Fraud App - MELI
3
+ emoji: 🌍
4
+ colorFrom: green
5
  colorTo: gray
6
  sdk: gradio
7
+ sdk_version: 3.35.2
8
+ app_file: src/app.py
9
  pinned: false
10
  license: mit
11
  ---
 
 
data/MercadoLibre Data Scientist Technical Challenge - Dataset.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9bfee87615b97787cfdbfe1798ec56d2de52a05d73c818bc836fa2168440c8ab
3
+ size 18390780
data/processed/selected_features.csv ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 0
2
+ b
3
+ c
4
+ h
5
+ j
6
+ k
7
+ l
8
+ m
9
+ o
10
+ monto
11
+ score
12
+ fecha_hour
13
+ fecha_minute
14
+ fecha_second
models/feature_engineering_pipeline.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:501863a278dd5e6cf3597123afbcacd4ed25eca8a27cf3259f3e7989236f7fdf
3
+ size 9157
models/final_pipeline.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a5ccc8788d5fa15337b2ee31b0c95e6c86cc7da346c5b305d4c4b089725c7d1b
3
+ size 433034
notebooks/01-eda.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
notebooks/02-feature_rngineering.ipynb ADDED
@@ -0,0 +1,1293 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "%load_ext autoreload\n",
10
+ "%autoreload 2\n",
11
+ "import pandas as pd\n",
12
+ "from sklearn.model_selection import train_test_split\n",
13
+ "from feature_engine.imputation import AddMissingIndicator, MeanMedianImputer, CategoricalImputer\n",
14
+ "from feature_engine.transformation import LogTransformer\n",
15
+ "from feature_engine.discretisation import ArbitraryDiscretiser\n",
16
+ "from feature_engine.encoding import RareLabelEncoder, OrdinalEncoder\n",
17
+ "from feature_engine.datetime import DatetimeFeatures\n",
18
+ "from utils import ScalerDf\n",
19
+ "from sklearn.pipeline import Pipeline\n",
20
+ "import joblib\n",
21
+ "import numpy as np"
22
+ ]
23
+ },
24
+ {
25
+ "cell_type": "code",
26
+ "execution_count": 2,
27
+ "metadata": {},
28
+ "outputs": [
29
+ {
30
+ "name": "stdout",
31
+ "output_type": "stream",
32
+ "text": [
33
+ "(150000, 19)\n"
34
+ ]
35
+ },
36
+ {
37
+ "data": {
38
+ "text/html": [
39
+ "<div>\n",
40
+ "<style scoped>\n",
41
+ " .dataframe tbody tr th:only-of-type {\n",
42
+ " vertical-align: middle;\n",
43
+ " }\n",
44
+ "\n",
45
+ " .dataframe tbody tr th {\n",
46
+ " vertical-align: top;\n",
47
+ " }\n",
48
+ "\n",
49
+ " .dataframe thead th {\n",
50
+ " text-align: right;\n",
51
+ " }\n",
52
+ "</style>\n",
53
+ "<table border=\"1\" class=\"dataframe\">\n",
54
+ " <thead>\n",
55
+ " <tr style=\"text-align: right;\">\n",
56
+ " <th></th>\n",
57
+ " <th>a</th>\n",
58
+ " <th>b</th>\n",
59
+ " <th>c</th>\n",
60
+ " <th>d</th>\n",
61
+ " <th>e</th>\n",
62
+ " <th>f</th>\n",
63
+ " <th>g</th>\n",
64
+ " <th>h</th>\n",
65
+ " <th>j</th>\n",
66
+ " <th>k</th>\n",
67
+ " <th>l</th>\n",
68
+ " <th>m</th>\n",
69
+ " <th>n</th>\n",
70
+ " <th>o</th>\n",
71
+ " <th>p</th>\n",
72
+ " <th>fecha</th>\n",
73
+ " <th>monto</th>\n",
74
+ " <th>score</th>\n",
75
+ " <th>fraude</th>\n",
76
+ " </tr>\n",
77
+ " </thead>\n",
78
+ " <tbody>\n",
79
+ " <tr>\n",
80
+ " <th>0</th>\n",
81
+ " <td>4</td>\n",
82
+ " <td>0.6812</td>\n",
83
+ " <td>50084.12</td>\n",
84
+ " <td>50.0</td>\n",
85
+ " <td>0.000000</td>\n",
86
+ " <td>20.0</td>\n",
87
+ " <td>AR</td>\n",
88
+ " <td>1</td>\n",
89
+ " <td>cat_d26ab52</td>\n",
90
+ " <td>0.365475</td>\n",
91
+ " <td>2479.0</td>\n",
92
+ " <td>952.0</td>\n",
93
+ " <td>1</td>\n",
94
+ " <td>NaN</td>\n",
95
+ " <td>Y</td>\n",
96
+ " <td>2020-03-20 09:28:19</td>\n",
97
+ " <td>57.63</td>\n",
98
+ " <td>100</td>\n",
99
+ " <td>0</td>\n",
100
+ " </tr>\n",
101
+ " <tr>\n",
102
+ " <th>1</th>\n",
103
+ " <td>4</td>\n",
104
+ " <td>0.6694</td>\n",
105
+ " <td>66005.49</td>\n",
106
+ " <td>0.0</td>\n",
107
+ " <td>0.000000</td>\n",
108
+ " <td>2.0</td>\n",
109
+ " <td>AR</td>\n",
110
+ " <td>1</td>\n",
111
+ " <td>cat_ea962fb</td>\n",
112
+ " <td>0.612728</td>\n",
113
+ " <td>2603.0</td>\n",
114
+ " <td>105.0</td>\n",
115
+ " <td>1</td>\n",
116
+ " <td>Y</td>\n",
117
+ " <td>Y</td>\n",
118
+ " <td>2020-03-09 13:58:28</td>\n",
119
+ " <td>40.19</td>\n",
120
+ " <td>25</td>\n",
121
+ " <td>0</td>\n",
122
+ " </tr>\n",
123
+ " <tr>\n",
124
+ " <th>2</th>\n",
125
+ " <td>4</td>\n",
126
+ " <td>0.4718</td>\n",
127
+ " <td>7059.05</td>\n",
128
+ " <td>4.0</td>\n",
129
+ " <td>0.463488</td>\n",
130
+ " <td>92.0</td>\n",
131
+ " <td>BR</td>\n",
132
+ " <td>25</td>\n",
133
+ " <td>cat_4c2544e</td>\n",
134
+ " <td>0.651835</td>\n",
135
+ " <td>2153.0</td>\n",
136
+ " <td>249.0</td>\n",
137
+ " <td>1</td>\n",
138
+ " <td>Y</td>\n",
139
+ " <td>Y</td>\n",
140
+ " <td>2020-04-08 12:25:55</td>\n",
141
+ " <td>5.77</td>\n",
142
+ " <td>23</td>\n",
143
+ " <td>0</td>\n",
144
+ " </tr>\n",
145
+ " <tr>\n",
146
+ " <th>3</th>\n",
147
+ " <td>4</td>\n",
148
+ " <td>0.7260</td>\n",
149
+ " <td>10043.10</td>\n",
150
+ " <td>24.0</td>\n",
151
+ " <td>0.046845</td>\n",
152
+ " <td>43.0</td>\n",
153
+ " <td>BR</td>\n",
154
+ " <td>43</td>\n",
155
+ " <td>cat_1b59ee3</td>\n",
156
+ " <td>0.692728</td>\n",
157
+ " <td>4845.0</td>\n",
158
+ " <td>141.0</td>\n",
159
+ " <td>1</td>\n",
160
+ " <td>N</td>\n",
161
+ " <td>Y</td>\n",
162
+ " <td>2020-03-14 11:46:13</td>\n",
163
+ " <td>40.89</td>\n",
164
+ " <td>23</td>\n",
165
+ " <td>0</td>\n",
166
+ " </tr>\n",
167
+ " <tr>\n",
168
+ " <th>4</th>\n",
169
+ " <td>4</td>\n",
170
+ " <td>0.7758</td>\n",
171
+ " <td>16584.42</td>\n",
172
+ " <td>2.0</td>\n",
173
+ " <td>0.154616</td>\n",
174
+ " <td>54.0</td>\n",
175
+ " <td>BR</td>\n",
176
+ " <td>0</td>\n",
177
+ " <td>cat_9bacaa5</td>\n",
178
+ " <td>0.201354</td>\n",
179
+ " <td>2856.0</td>\n",
180
+ " <td>18.0</td>\n",
181
+ " <td>1</td>\n",
182
+ " <td>Y</td>\n",
183
+ " <td>N</td>\n",
184
+ " <td>2020-03-23 14:17:13</td>\n",
185
+ " <td>18.98</td>\n",
186
+ " <td>71</td>\n",
187
+ " <td>0</td>\n",
188
+ " </tr>\n",
189
+ " </tbody>\n",
190
+ "</table>\n",
191
+ "</div>"
192
+ ],
193
+ "text/plain": [
194
+ " a b c d e f g h j k \\\n",
195
+ "0 4 0.6812 50084.12 50.0 0.000000 20.0 AR 1 cat_d26ab52 0.365475 \n",
196
+ "1 4 0.6694 66005.49 0.0 0.000000 2.0 AR 1 cat_ea962fb 0.612728 \n",
197
+ "2 4 0.4718 7059.05 4.0 0.463488 92.0 BR 25 cat_4c2544e 0.651835 \n",
198
+ "3 4 0.7260 10043.10 24.0 0.046845 43.0 BR 43 cat_1b59ee3 0.692728 \n",
199
+ "4 4 0.7758 16584.42 2.0 0.154616 54.0 BR 0 cat_9bacaa5 0.201354 \n",
200
+ "\n",
201
+ " l m n o p fecha monto score fraude \n",
202
+ "0 2479.0 952.0 1 NaN Y 2020-03-20 09:28:19 57.63 100 0 \n",
203
+ "1 2603.0 105.0 1 Y Y 2020-03-09 13:58:28 40.19 25 0 \n",
204
+ "2 2153.0 249.0 1 Y Y 2020-04-08 12:25:55 5.77 23 0 \n",
205
+ "3 4845.0 141.0 1 N Y 2020-03-14 11:46:13 40.89 23 0 \n",
206
+ "4 2856.0 18.0 1 Y N 2020-03-23 14:17:13 18.98 71 0 "
207
+ ]
208
+ },
209
+ "execution_count": 2,
210
+ "metadata": {},
211
+ "output_type": "execute_result"
212
+ }
213
+ ],
214
+ "source": [
215
+ "data = pd.read_csv('../data/MercadoLibre Data Scientist Technical Challenge - Dataset.csv')\n",
216
+ "print(data.shape)\n",
217
+ "data.head()"
218
+ ]
219
+ },
220
+ {
221
+ "cell_type": "code",
222
+ "execution_count": 3,
223
+ "metadata": {},
224
+ "outputs": [
225
+ {
226
+ "data": {
227
+ "text/plain": [
228
+ "((135000, 18), (15000, 18))"
229
+ ]
230
+ },
231
+ "execution_count": 3,
232
+ "metadata": {},
233
+ "output_type": "execute_result"
234
+ }
235
+ ],
236
+ "source": [
237
+ "X_train, X_test, y_train, y_test = train_test_split(\n",
238
+ " data.drop(['fraude'], axis=1), # predictive variables\n",
239
+ " data['fraude'], # target\n",
240
+ " test_size=0.1, # portion of dataset to allocate to test set\n",
241
+ " random_state=0, # we are setting the seed here\n",
242
+ ")\n",
243
+ "\n",
244
+ "X_train.shape, X_test.shape"
245
+ ]
246
+ },
247
+ {
248
+ "attachments": {},
249
+ "cell_type": "markdown",
250
+ "metadata": {},
251
+ "source": [
252
+ "## missing indicator"
253
+ ]
254
+ },
255
+ {
256
+ "cell_type": "code",
257
+ "execution_count": 4,
258
+ "metadata": {},
259
+ "outputs": [],
260
+ "source": [
261
+ "## Vars with na\n",
262
+ "vars_with_na = [var for var in data.columns if data[var].isnull().sum() > 0]\n",
263
+ "indicator = AddMissingIndicator(variables=vars_with_na)\n",
264
+ "indicator.fit(X_train)\n",
265
+ "transform_data =indicator.transform(X_train)"
266
+ ]
267
+ },
268
+ {
269
+ "attachments": {},
270
+ "cell_type": "markdown",
271
+ "metadata": {},
272
+ "source": [
273
+ "## Imputation on numerical vars"
274
+ ]
275
+ },
276
+ {
277
+ "cell_type": "code",
278
+ "execution_count": 5,
279
+ "metadata": {},
280
+ "outputs": [],
281
+ "source": [
282
+ "# make list of numerical variables\n",
283
+ "num_vars = [var for var in data.columns if data[var].dtypes != 'O' and 'fraude' not in var]\n",
284
+ "num_vars_na = [var for var in num_vars if var in vars_with_na]\n",
285
+ "\n",
286
+ "imputer = MeanMedianImputer(imputation_method='median', variables=num_vars_na)\n",
287
+ "imputer.fit(transform_data)\n",
288
+ "transform_data =imputer.transform(transform_data)"
289
+ ]
290
+ },
291
+ {
292
+ "attachments": {},
293
+ "cell_type": "markdown",
294
+ "metadata": {},
295
+ "source": [
296
+ "## Transformation of numerical vars"
297
+ ]
298
+ },
299
+ {
300
+ "cell_type": "code",
301
+ "execution_count": 6,
302
+ "metadata": {},
303
+ "outputs": [],
304
+ "source": [
305
+ "log_vars =['c','monto']"
306
+ ]
307
+ },
308
+ {
309
+ "cell_type": "code",
310
+ "execution_count": 7,
311
+ "metadata": {},
312
+ "outputs": [],
313
+ "source": [
314
+ "logtranformer = LogTransformer(variables=log_vars)\n",
315
+ "logtranformer.fit(transform_data)\n",
316
+ "transform_data = logtranformer.transform(transform_data)"
317
+ ]
318
+ },
319
+ {
320
+ "attachments": {},
321
+ "cell_type": "markdown",
322
+ "metadata": {},
323
+ "source": [
324
+ "### Discretizacion"
325
+ ]
326
+ },
327
+ {
328
+ "cell_type": "code",
329
+ "execution_count": 8,
330
+ "metadata": {},
331
+ "outputs": [
332
+ {
333
+ "data": {
334
+ "text/html": [
335
+ "<div>\n",
336
+ "<style scoped>\n",
337
+ " .dataframe tbody tr th:only-of-type {\n",
338
+ " vertical-align: middle;\n",
339
+ " }\n",
340
+ "\n",
341
+ " .dataframe tbody tr th {\n",
342
+ " vertical-align: top;\n",
343
+ " }\n",
344
+ "\n",
345
+ " .dataframe thead th {\n",
346
+ " text-align: right;\n",
347
+ " }\n",
348
+ "</style>\n",
349
+ "<table border=\"1\" class=\"dataframe\">\n",
350
+ " <thead>\n",
351
+ " <tr style=\"text-align: right;\">\n",
352
+ " <th></th>\n",
353
+ " <th>a</th>\n",
354
+ " <th>b</th>\n",
355
+ " <th>c</th>\n",
356
+ " <th>d</th>\n",
357
+ " <th>e</th>\n",
358
+ " <th>f</th>\n",
359
+ " <th>g</th>\n",
360
+ " <th>h</th>\n",
361
+ " <th>j</th>\n",
362
+ " <th>k</th>\n",
363
+ " <th>...</th>\n",
364
+ " <th>monto</th>\n",
365
+ " <th>score</th>\n",
366
+ " <th>b_na</th>\n",
367
+ " <th>c_na</th>\n",
368
+ " <th>d_na</th>\n",
369
+ " <th>f_na</th>\n",
370
+ " <th>g_na</th>\n",
371
+ " <th>l_na</th>\n",
372
+ " <th>m_na</th>\n",
373
+ " <th>o_na</th>\n",
374
+ " </tr>\n",
375
+ " </thead>\n",
376
+ " <tbody>\n",
377
+ " <tr>\n",
378
+ " <th>135569</th>\n",
379
+ " <td>4</td>\n",
380
+ " <td>0.5217</td>\n",
381
+ " <td>9.791941</td>\n",
382
+ " <td>1.0</td>\n",
383
+ " <td>1</td>\n",
384
+ " <td>1</td>\n",
385
+ " <td>BR</td>\n",
386
+ " <td>36</td>\n",
387
+ " <td>cat_4744ece</td>\n",
388
+ " <td>0.636610</td>\n",
389
+ " <td>...</td>\n",
390
+ " <td>3.214466</td>\n",
391
+ " <td>93</td>\n",
392
+ " <td>0</td>\n",
393
+ " <td>0</td>\n",
394
+ " <td>0</td>\n",
395
+ " <td>0</td>\n",
396
+ " <td>0</td>\n",
397
+ " <td>0</td>\n",
398
+ " <td>0</td>\n",
399
+ " <td>1</td>\n",
400
+ " </tr>\n",
401
+ " <tr>\n",
402
+ " <th>78656</th>\n",
403
+ " <td>2</td>\n",
404
+ " <td>0.7554</td>\n",
405
+ " <td>10.686472</td>\n",
406
+ " <td>1.0</td>\n",
407
+ " <td>0</td>\n",
408
+ " <td>1</td>\n",
409
+ " <td>AR</td>\n",
410
+ " <td>8</td>\n",
411
+ " <td>cat_3203c7c</td>\n",
412
+ " <td>0.633266</td>\n",
413
+ " <td>...</td>\n",
414
+ " <td>3.364188</td>\n",
415
+ " <td>6</td>\n",
416
+ " <td>1</td>\n",
417
+ " <td>1</td>\n",
418
+ " <td>0</td>\n",
419
+ " <td>0</td>\n",
420
+ " <td>0</td>\n",
421
+ " <td>0</td>\n",
422
+ " <td>0</td>\n",
423
+ " <td>1</td>\n",
424
+ " </tr>\n",
425
+ " <tr>\n",
426
+ " <th>87437</th>\n",
427
+ " <td>4</td>\n",
428
+ " <td>0.5437</td>\n",
429
+ " <td>11.717906</td>\n",
430
+ " <td>1.0</td>\n",
431
+ " <td>1</td>\n",
432
+ " <td>1</td>\n",
433
+ " <td>AR</td>\n",
434
+ " <td>46</td>\n",
435
+ " <td>cat_5b785c6</td>\n",
436
+ " <td>0.735749</td>\n",
437
+ " <td>...</td>\n",
438
+ " <td>3.106826</td>\n",
439
+ " <td>55</td>\n",
440
+ " <td>0</td>\n",
441
+ " <td>0</td>\n",
442
+ " <td>0</td>\n",
443
+ " <td>0</td>\n",
444
+ " <td>0</td>\n",
445
+ " <td>0</td>\n",
446
+ " <td>0</td>\n",
447
+ " <td>1</td>\n",
448
+ " </tr>\n",
449
+ " <tr>\n",
450
+ " <th>131674</th>\n",
451
+ " <td>4</td>\n",
452
+ " <td>0.7418</td>\n",
453
+ " <td>9.755215</td>\n",
454
+ " <td>50.0</td>\n",
455
+ " <td>1</td>\n",
456
+ " <td>1</td>\n",
457
+ " <td>BR</td>\n",
458
+ " <td>9</td>\n",
459
+ " <td>cat_a8c10a4</td>\n",
460
+ " <td>0.529367</td>\n",
461
+ " <td>...</td>\n",
462
+ " <td>2.867899</td>\n",
463
+ " <td>7</td>\n",
464
+ " <td>0</td>\n",
465
+ " <td>0</td>\n",
466
+ " <td>0</td>\n",
467
+ " <td>0</td>\n",
468
+ " <td>0</td>\n",
469
+ " <td>0</td>\n",
470
+ " <td>0</td>\n",
471
+ " <td>1</td>\n",
472
+ " </tr>\n",
473
+ " <tr>\n",
474
+ " <th>45535</th>\n",
475
+ " <td>4</td>\n",
476
+ " <td>0.6463</td>\n",
477
+ " <td>10.851127</td>\n",
478
+ " <td>4.0</td>\n",
479
+ " <td>1</td>\n",
480
+ " <td>1</td>\n",
481
+ " <td>AR</td>\n",
482
+ " <td>22</td>\n",
483
+ " <td>cat_edae169</td>\n",
484
+ " <td>0.049212</td>\n",
485
+ " <td>...</td>\n",
486
+ " <td>3.383712</td>\n",
487
+ " <td>32</td>\n",
488
+ " <td>0</td>\n",
489
+ " <td>0</td>\n",
490
+ " <td>0</td>\n",
491
+ " <td>0</td>\n",
492
+ " <td>0</td>\n",
493
+ " <td>0</td>\n",
494
+ " <td>0</td>\n",
495
+ " <td>0</td>\n",
496
+ " </tr>\n",
497
+ " </tbody>\n",
498
+ "</table>\n",
499
+ "<p>5 rows Γ— 26 columns</p>\n",
500
+ "</div>"
501
+ ],
502
+ "text/plain": [
503
+ " a b c d e f g h j k ... \\\n",
504
+ "135569 4 0.5217 9.791941 1.0 1 1 BR 36 cat_4744ece 0.636610 ... \n",
505
+ "78656 2 0.7554 10.686472 1.0 0 1 AR 8 cat_3203c7c 0.633266 ... \n",
506
+ "87437 4 0.5437 11.717906 1.0 1 1 AR 46 cat_5b785c6 0.735749 ... \n",
507
+ "131674 4 0.7418 9.755215 50.0 1 1 BR 9 cat_a8c10a4 0.529367 ... \n",
508
+ "45535 4 0.6463 10.851127 4.0 1 1 AR 22 cat_edae169 0.049212 ... \n",
509
+ "\n",
510
+ " monto score b_na c_na d_na f_na g_na l_na m_na o_na \n",
511
+ "135569 3.214466 93 0 0 0 0 0 0 0 1 \n",
512
+ "78656 3.364188 6 1 1 0 0 0 0 0 1 \n",
513
+ "87437 3.106826 55 0 0 0 0 0 0 0 1 \n",
514
+ "131674 2.867899 7 0 0 0 0 0 0 0 1 \n",
515
+ "45535 3.383712 32 0 0 0 0 0 0 0 0 \n",
516
+ "\n",
517
+ "[5 rows x 26 columns]"
518
+ ]
519
+ },
520
+ "execution_count": 8,
521
+ "metadata": {},
522
+ "output_type": "execute_result"
523
+ }
524
+ ],
525
+ "source": [
526
+ "skewed_vars = ['e', 'f']\n",
527
+ "discretizer = ArbitraryDiscretiser( binning_dict= dict(e =[-np.inf,0,np.inf], f=[-np.inf,0,np.inf]) )\n",
528
+ "discretizer.fit(transform_data)\n",
529
+ "transform_data = discretizer.transform(transform_data)\n",
530
+ "transform_data.head()"
531
+ ]
532
+ },
533
+ {
534
+ "attachments": {},
535
+ "cell_type": "markdown",
536
+ "metadata": {},
537
+ "source": [
538
+ "# Transformacion variables categoricas"
539
+ ]
540
+ },
541
+ {
542
+ "cell_type": "code",
543
+ "execution_count": 9,
544
+ "metadata": {},
545
+ "outputs": [],
546
+ "source": [
547
+ "# capture categorical variables in a list\n",
548
+ "cat_vars = [var for var in data.columns if data[var].dtypes == 'O' and 'fecha' not in var]\n",
549
+ "cat_vars_na = [var for var in cat_vars if var in vars_with_na]\n",
550
+ "categorical_imputer = CategoricalImputer(variables=cat_vars_na, imputation_method='missing', fill_value='missing')\n",
551
+ "categorical_imputer.fit(transform_data)\n",
552
+ "transform_data = categorical_imputer.transform(transform_data)"
553
+ ]
554
+ },
555
+ {
556
+ "cell_type": "code",
557
+ "execution_count": 10,
558
+ "metadata": {},
559
+ "outputs": [],
560
+ "source": [
561
+ "## Encode rare labels\n",
562
+ "rarelabel = RareLabelEncoder(variables=cat_vars, tol=0.001, n_categories=1)\n",
563
+ "rarelabel.fit(transform_data)\n",
564
+ "transform_data = rarelabel.transform(transform_data)\n"
565
+ ]
566
+ },
567
+ {
568
+ "cell_type": "code",
569
+ "execution_count": 11,
570
+ "metadata": {},
571
+ "outputs": [],
572
+ "source": [
573
+ "## ordinal encoders\n",
574
+ "ordinal_encoder = OrdinalEncoder(variables=cat_vars)\n",
575
+ "ordinal_encoder.fit(transform_data, y_train)\n",
576
+ "transform_data = ordinal_encoder.transform(transform_data)"
577
+ ]
578
+ },
579
+ {
580
+ "attachments": {},
581
+ "cell_type": "markdown",
582
+ "metadata": {},
583
+ "source": [
584
+ "## Datetime Features"
585
+ ]
586
+ },
587
+ {
588
+ "cell_type": "code",
589
+ "execution_count": 12,
590
+ "metadata": {},
591
+ "outputs": [],
592
+ "source": [
593
+ "dt_features = DatetimeFeatures(variables='fecha', features_to_extract='all')\n",
594
+ "dt_features.fit(transform_data)\n",
595
+ "transform_data = dt_features.transform(transform_data)"
596
+ ]
597
+ },
598
+ {
599
+ "attachments": {},
600
+ "cell_type": "markdown",
601
+ "metadata": {},
602
+ "source": [
603
+ "## Scaler data"
604
+ ]
605
+ },
606
+ {
607
+ "cell_type": "code",
608
+ "execution_count": 13,
609
+ "metadata": {},
610
+ "outputs": [],
611
+ "source": [
612
+ "scaler = ScalerDf(method='minmax')\n",
613
+ "scaler.fit(transform_data)\n",
614
+ "transform_data = scaler.transform(transform_data)"
615
+ ]
616
+ },
617
+ {
618
+ "cell_type": "code",
619
+ "execution_count": 14,
620
+ "metadata": {},
621
+ "outputs": [
622
+ {
623
+ "data": {
624
+ "text/plain": [
625
+ "Index(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'j', 'k', 'l', 'm', 'n', 'o',\n",
626
+ " 'p', 'monto', 'score', 'b_na', 'c_na', 'd_na', 'f_na', 'g_na', 'l_na',\n",
627
+ " 'm_na', 'o_na', 'fecha_month', 'fecha_quarter', 'fecha_semester',\n",
628
+ " 'fecha_year', 'fecha_week', 'fecha_day_of_week', 'fecha_day_of_month',\n",
629
+ " 'fecha_day_of_year', 'fecha_weekend', 'fecha_month_start',\n",
630
+ " 'fecha_month_end', 'fecha_quarter_start', 'fecha_quarter_end',\n",
631
+ " 'fecha_year_start', 'fecha_year_end', 'fecha_leap_year',\n",
632
+ " 'fecha_days_in_month', 'fecha_hour', 'fecha_minute', 'fecha_second'],\n",
633
+ " dtype='object')"
634
+ ]
635
+ },
636
+ "execution_count": 14,
637
+ "metadata": {},
638
+ "output_type": "execute_result"
639
+ }
640
+ ],
641
+ "source": [
642
+ "transform_data.columns"
643
+ ]
644
+ },
645
+ {
646
+ "attachments": {},
647
+ "cell_type": "markdown",
648
+ "metadata": {},
649
+ "source": [
650
+ "# Pongamos todo junto"
651
+ ]
652
+ },
653
+ {
654
+ "cell_type": "code",
655
+ "execution_count": 15,
656
+ "metadata": {},
657
+ "outputs": [],
658
+ "source": [
659
+ "pipeline_steps = [\n",
660
+ " ('missing_indicator',AddMissingIndicator(variables=vars_with_na)),\n",
661
+ " ('numerical_imputer', MeanMedianImputer(imputation_method='median', variables=num_vars_na)),\n",
662
+ " ('categorical_imputer', CategoricalImputer(variables=cat_vars_na, imputation_method='missing', fill_value='missing')),\n",
663
+ " ('numerical_transformation', LogTransformer(variables=log_vars)),\n",
664
+ " ('binarizer', ArbitraryDiscretiser( binning_dict= dict(e =[-np.inf,0,np.inf], f=[-np.inf,0,np.inf]))),\n",
665
+ " ('rare_label_encoder', RareLabelEncoder(variables=cat_vars, tol=0.001, n_categories=1)),\n",
666
+ " ('ordinal_encoder', OrdinalEncoder(variables=cat_vars)),\n",
667
+ " ('datetime_features', DatetimeFeatures(variables='fecha', features_to_extract='all')),\n",
668
+ " ('scaler', ScalerDf(method='minmax'))\n",
669
+ " \n",
670
+ "]"
671
+ ]
672
+ },
673
+ {
674
+ "cell_type": "code",
675
+ "execution_count": 16,
676
+ "metadata": {},
677
+ "outputs": [],
678
+ "source": [
679
+ "fraud_pipeline = Pipeline(pipeline_steps)"
680
+ ]
681
+ },
682
+ {
683
+ "cell_type": "code",
684
+ "execution_count": 17,
685
+ "metadata": {},
686
+ "outputs": [
687
+ {
688
+ "data": {
689
+ "text/html": [
690
+ "<style>#sk-container-id-1 {color: black;}#sk-container-id-1 pre{padding: 0;}#sk-container-id-1 div.sk-toggleable {background-color: white;}#sk-container-id-1 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-1 label.sk-toggleable__label-arrow:before {content: \"β–Έ\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-1 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-1 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-1 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"β–Ύ\";}#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-1 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-1 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-1 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-1 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-1 div.sk-item {position: relative;z-index: 1;}#sk-container-id-1 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-1 div.sk-item::before, #sk-container-id-1 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-1 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-1 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-1 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-1 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-1 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-1 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-1 div.sk-label-container {text-align: center;}#sk-container-id-1 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-1 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-1\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>Pipeline(steps=[(&#x27;missing_indicator&#x27;,\n",
691
+ " AddMissingIndicator(variables=[&#x27;b&#x27;, &#x27;c&#x27;, &#x27;d&#x27;, &#x27;f&#x27;, &#x27;g&#x27;, &#x27;l&#x27;,\n",
692
+ " &#x27;m&#x27;, &#x27;o&#x27;])),\n",
693
+ " (&#x27;numerical_imputer&#x27;,\n",
694
+ " MeanMedianImputer(variables=[&#x27;b&#x27;, &#x27;c&#x27;, &#x27;d&#x27;, &#x27;f&#x27;, &#x27;l&#x27;, &#x27;m&#x27;])),\n",
695
+ " (&#x27;categorical_imputer&#x27;,\n",
696
+ " CategoricalImputer(fill_value=&#x27;missing&#x27;,\n",
697
+ " variables=[&#x27;g&#x27;, &#x27;o&#x27;])),\n",
698
+ " (&#x27;numerical_transformation&#x27;,\n",
699
+ " LogTransformer(variables=[&#x27;c&#x27;, &#x27;monto&#x27;])),\n",
700
+ " (&#x27;binarizer&#x27;,\n",
701
+ " ArbitraryDiscretiser(binning_dict={&#x27;e&#x27;: [-inf, 0, inf],\n",
702
+ " &#x27;f&#x27;: [-inf, 0, inf]})),\n",
703
+ " (&#x27;rare_label_encoder&#x27;,\n",
704
+ " RareLabelEncoder(n_categories=1, tol=0.001,\n",
705
+ " variables=[&#x27;g&#x27;, &#x27;j&#x27;, &#x27;o&#x27;, &#x27;p&#x27;])),\n",
706
+ " (&#x27;ordinal_encoder&#x27;,\n",
707
+ " OrdinalEncoder(variables=[&#x27;g&#x27;, &#x27;j&#x27;, &#x27;o&#x27;, &#x27;p&#x27;])),\n",
708
+ " (&#x27;datetime_features&#x27;,\n",
709
+ " DatetimeFeatures(features_to_extract=&#x27;all&#x27;,\n",
710
+ " variables=&#x27;fecha&#x27;)),\n",
711
+ " (&#x27;scaler&#x27;, ScalerDf(method=&#x27;minmax&#x27;))])</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-1\" type=\"checkbox\" ><label for=\"sk-estimator-id-1\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">Pipeline</label><div class=\"sk-toggleable__content\"><pre>Pipeline(steps=[(&#x27;missing_indicator&#x27;,\n",
712
+ " AddMissingIndicator(variables=[&#x27;b&#x27;, &#x27;c&#x27;, &#x27;d&#x27;, &#x27;f&#x27;, &#x27;g&#x27;, &#x27;l&#x27;,\n",
713
+ " &#x27;m&#x27;, &#x27;o&#x27;])),\n",
714
+ " (&#x27;numerical_imputer&#x27;,\n",
715
+ " MeanMedianImputer(variables=[&#x27;b&#x27;, &#x27;c&#x27;, &#x27;d&#x27;, &#x27;f&#x27;, &#x27;l&#x27;, &#x27;m&#x27;])),\n",
716
+ " (&#x27;categorical_imputer&#x27;,\n",
717
+ " CategoricalImputer(fill_value=&#x27;missing&#x27;,\n",
718
+ " variables=[&#x27;g&#x27;, &#x27;o&#x27;])),\n",
719
+ " (&#x27;numerical_transformation&#x27;,\n",
720
+ " LogTransformer(variables=[&#x27;c&#x27;, &#x27;monto&#x27;])),\n",
721
+ " (&#x27;binarizer&#x27;,\n",
722
+ " ArbitraryDiscretiser(binning_dict={&#x27;e&#x27;: [-inf, 0, inf],\n",
723
+ " &#x27;f&#x27;: [-inf, 0, inf]})),\n",
724
+ " (&#x27;rare_label_encoder&#x27;,\n",
725
+ " RareLabelEncoder(n_categories=1, tol=0.001,\n",
726
+ " variables=[&#x27;g&#x27;, &#x27;j&#x27;, &#x27;o&#x27;, &#x27;p&#x27;])),\n",
727
+ " (&#x27;ordinal_encoder&#x27;,\n",
728
+ " OrdinalEncoder(variables=[&#x27;g&#x27;, &#x27;j&#x27;, &#x27;o&#x27;, &#x27;p&#x27;])),\n",
729
+ " (&#x27;datetime_features&#x27;,\n",
730
+ " DatetimeFeatures(features_to_extract=&#x27;all&#x27;,\n",
731
+ " variables=&#x27;fecha&#x27;)),\n",
732
+ " (&#x27;scaler&#x27;, ScalerDf(method=&#x27;minmax&#x27;))])</pre></div></div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-2\" type=\"checkbox\" ><label for=\"sk-estimator-id-2\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">AddMissingIndicator</label><div class=\"sk-toggleable__content\"><pre>AddMissingIndicator(variables=[&#x27;b&#x27;, &#x27;c&#x27;, &#x27;d&#x27;, &#x27;f&#x27;, &#x27;g&#x27;, &#x27;l&#x27;, &#x27;m&#x27;, &#x27;o&#x27;])</pre></div></div></div><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-3\" type=\"checkbox\" ><label for=\"sk-estimator-id-3\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">MeanMedianImputer</label><div class=\"sk-toggleable__content\"><pre>MeanMedianImputer(variables=[&#x27;b&#x27;, &#x27;c&#x27;, &#x27;d&#x27;, &#x27;f&#x27;, &#x27;l&#x27;, &#x27;m&#x27;])</pre></div></div></div><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-4\" type=\"checkbox\" ><label for=\"sk-estimator-id-4\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">CategoricalImputer</label><div class=\"sk-toggleable__content\"><pre>CategoricalImputer(fill_value=&#x27;missing&#x27;, variables=[&#x27;g&#x27;, &#x27;o&#x27;])</pre></div></div></div><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-5\" type=\"checkbox\" ><label for=\"sk-estimator-id-5\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">LogTransformer</label><div class=\"sk-toggleable__content\"><pre>LogTransformer(variables=[&#x27;c&#x27;, &#x27;monto&#x27;])</pre></div></div></div><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-6\" type=\"checkbox\" ><label for=\"sk-estimator-id-6\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">ArbitraryDiscretiser</label><div class=\"sk-toggleable__content\"><pre>ArbitraryDiscretiser(binning_dict={&#x27;e&#x27;: [-inf, 0, inf], &#x27;f&#x27;: [-inf, 0, inf]})</pre></div></div></div><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-7\" type=\"checkbox\" ><label for=\"sk-estimator-id-7\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">RareLabelEncoder</label><div class=\"sk-toggleable__content\"><pre>RareLabelEncoder(n_categories=1, tol=0.001, variables=[&#x27;g&#x27;, &#x27;j&#x27;, &#x27;o&#x27;, &#x27;p&#x27;])</pre></div></div></div><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-8\" type=\"checkbox\" ><label for=\"sk-estimator-id-8\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">OrdinalEncoder</label><div class=\"sk-toggleable__content\"><pre>OrdinalEncoder(variables=[&#x27;g&#x27;, &#x27;j&#x27;, &#x27;o&#x27;, &#x27;p&#x27;])</pre></div></div></div><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-9\" type=\"checkbox\" ><label for=\"sk-estimator-id-9\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">DatetimeFeatures</label><div class=\"sk-toggleable__content\"><pre>DatetimeFeatures(features_to_extract=&#x27;all&#x27;, variables=&#x27;fecha&#x27;)</pre></div></div></div><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-10\" type=\"checkbox\" ><label for=\"sk-estimator-id-10\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">ScalerDf</label><div class=\"sk-toggleable__content\"><pre>ScalerDf(method=&#x27;minmax&#x27;)</pre></div></div></div></div></div></div></div>"
733
+ ],
734
+ "text/plain": [
735
+ "Pipeline(steps=[('missing_indicator',\n",
736
+ " AddMissingIndicator(variables=['b', 'c', 'd', 'f', 'g', 'l',\n",
737
+ " 'm', 'o'])),\n",
738
+ " ('numerical_imputer',\n",
739
+ " MeanMedianImputer(variables=['b', 'c', 'd', 'f', 'l', 'm'])),\n",
740
+ " ('categorical_imputer',\n",
741
+ " CategoricalImputer(fill_value='missing',\n",
742
+ " variables=['g', 'o'])),\n",
743
+ " ('numerical_transformation',\n",
744
+ " LogTransformer(variables=['c', 'monto'])),\n",
745
+ " ('binarizer',\n",
746
+ " ArbitraryDiscretiser(binning_dict={'e': [-inf, 0, inf],\n",
747
+ " 'f': [-inf, 0, inf]})),\n",
748
+ " ('rare_label_encoder',\n",
749
+ " RareLabelEncoder(n_categories=1, tol=0.001,\n",
750
+ " variables=['g', 'j', 'o', 'p'])),\n",
751
+ " ('ordinal_encoder',\n",
752
+ " OrdinalEncoder(variables=['g', 'j', 'o', 'p'])),\n",
753
+ " ('datetime_features',\n",
754
+ " DatetimeFeatures(features_to_extract='all',\n",
755
+ " variables='fecha')),\n",
756
+ " ('scaler', ScalerDf(method='minmax'))])"
757
+ ]
758
+ },
759
+ "execution_count": 17,
760
+ "metadata": {},
761
+ "output_type": "execute_result"
762
+ }
763
+ ],
764
+ "source": [
765
+ "fraud_pipeline"
766
+ ]
767
+ },
768
+ {
769
+ "cell_type": "code",
770
+ "execution_count": 18,
771
+ "metadata": {},
772
+ "outputs": [
773
+ {
774
+ "data": {
775
+ "text/html": [
776
+ "<style>#sk-container-id-2 {color: black;}#sk-container-id-2 pre{padding: 0;}#sk-container-id-2 div.sk-toggleable {background-color: white;}#sk-container-id-2 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-2 label.sk-toggleable__label-arrow:before {content: \"β–Έ\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-2 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-2 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-2 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-2 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-2 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-2 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"β–Ύ\";}#sk-container-id-2 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-2 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-2 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-2 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-2 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-2 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-2 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-2 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-2 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-2 div.sk-item {position: relative;z-index: 1;}#sk-container-id-2 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-2 div.sk-item::before, #sk-container-id-2 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-2 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-2 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-2 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-2 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-2 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-2 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-2 div.sk-label-container {text-align: center;}#sk-container-id-2 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-2 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-2\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>Pipeline(steps=[(&#x27;missing_indicator&#x27;,\n",
777
+ " AddMissingIndicator(variables=[&#x27;b&#x27;, &#x27;c&#x27;, &#x27;d&#x27;, &#x27;f&#x27;, &#x27;g&#x27;, &#x27;l&#x27;,\n",
778
+ " &#x27;m&#x27;, &#x27;o&#x27;])),\n",
779
+ " (&#x27;numerical_imputer&#x27;,\n",
780
+ " MeanMedianImputer(variables=[&#x27;b&#x27;, &#x27;c&#x27;, &#x27;d&#x27;, &#x27;f&#x27;, &#x27;l&#x27;, &#x27;m&#x27;])),\n",
781
+ " (&#x27;categorical_imputer&#x27;,\n",
782
+ " CategoricalImputer(fill_value=&#x27;missing&#x27;,\n",
783
+ " variables=[&#x27;g&#x27;, &#x27;o&#x27;])),\n",
784
+ " (&#x27;numerical_transformation&#x27;,\n",
785
+ " LogTransformer(variables=[&#x27;c&#x27;, &#x27;monto&#x27;])),\n",
786
+ " (&#x27;binarizer&#x27;,\n",
787
+ " ArbitraryDiscretiser(binning_dict={&#x27;e&#x27;: [-inf, 0, inf],\n",
788
+ " &#x27;f&#x27;: [-inf, 0, inf]})),\n",
789
+ " (&#x27;rare_label_encoder&#x27;,\n",
790
+ " RareLabelEncoder(n_categories=1, tol=0.001,\n",
791
+ " variables=[&#x27;g&#x27;, &#x27;j&#x27;, &#x27;o&#x27;, &#x27;p&#x27;])),\n",
792
+ " (&#x27;ordinal_encoder&#x27;,\n",
793
+ " OrdinalEncoder(variables=[&#x27;g&#x27;, &#x27;j&#x27;, &#x27;o&#x27;, &#x27;p&#x27;])),\n",
794
+ " (&#x27;datetime_features&#x27;,\n",
795
+ " DatetimeFeatures(features_to_extract=&#x27;all&#x27;,\n",
796
+ " variables=&#x27;fecha&#x27;)),\n",
797
+ " (&#x27;scaler&#x27;, ScalerDf(method=&#x27;minmax&#x27;))])</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-11\" type=\"checkbox\" ><label for=\"sk-estimator-id-11\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">Pipeline</label><div class=\"sk-toggleable__content\"><pre>Pipeline(steps=[(&#x27;missing_indicator&#x27;,\n",
798
+ " AddMissingIndicator(variables=[&#x27;b&#x27;, &#x27;c&#x27;, &#x27;d&#x27;, &#x27;f&#x27;, &#x27;g&#x27;, &#x27;l&#x27;,\n",
799
+ " &#x27;m&#x27;, &#x27;o&#x27;])),\n",
800
+ " (&#x27;numerical_imputer&#x27;,\n",
801
+ " MeanMedianImputer(variables=[&#x27;b&#x27;, &#x27;c&#x27;, &#x27;d&#x27;, &#x27;f&#x27;, &#x27;l&#x27;, &#x27;m&#x27;])),\n",
802
+ " (&#x27;categorical_imputer&#x27;,\n",
803
+ " CategoricalImputer(fill_value=&#x27;missing&#x27;,\n",
804
+ " variables=[&#x27;g&#x27;, &#x27;o&#x27;])),\n",
805
+ " (&#x27;numerical_transformation&#x27;,\n",
806
+ " LogTransformer(variables=[&#x27;c&#x27;, &#x27;monto&#x27;])),\n",
807
+ " (&#x27;binarizer&#x27;,\n",
808
+ " ArbitraryDiscretiser(binning_dict={&#x27;e&#x27;: [-inf, 0, inf],\n",
809
+ " &#x27;f&#x27;: [-inf, 0, inf]})),\n",
810
+ " (&#x27;rare_label_encoder&#x27;,\n",
811
+ " RareLabelEncoder(n_categories=1, tol=0.001,\n",
812
+ " variables=[&#x27;g&#x27;, &#x27;j&#x27;, &#x27;o&#x27;, &#x27;p&#x27;])),\n",
813
+ " (&#x27;ordinal_encoder&#x27;,\n",
814
+ " OrdinalEncoder(variables=[&#x27;g&#x27;, &#x27;j&#x27;, &#x27;o&#x27;, &#x27;p&#x27;])),\n",
815
+ " (&#x27;datetime_features&#x27;,\n",
816
+ " DatetimeFeatures(features_to_extract=&#x27;all&#x27;,\n",
817
+ " variables=&#x27;fecha&#x27;)),\n",
818
+ " (&#x27;scaler&#x27;, ScalerDf(method=&#x27;minmax&#x27;))])</pre></div></div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-12\" type=\"checkbox\" ><label for=\"sk-estimator-id-12\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">AddMissingIndicator</label><div class=\"sk-toggleable__content\"><pre>AddMissingIndicator(variables=[&#x27;b&#x27;, &#x27;c&#x27;, &#x27;d&#x27;, &#x27;f&#x27;, &#x27;g&#x27;, &#x27;l&#x27;, &#x27;m&#x27;, &#x27;o&#x27;])</pre></div></div></div><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-13\" type=\"checkbox\" ><label for=\"sk-estimator-id-13\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">MeanMedianImputer</label><div class=\"sk-toggleable__content\"><pre>MeanMedianImputer(variables=[&#x27;b&#x27;, &#x27;c&#x27;, &#x27;d&#x27;, &#x27;f&#x27;, &#x27;l&#x27;, &#x27;m&#x27;])</pre></div></div></div><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-14\" type=\"checkbox\" ><label for=\"sk-estimator-id-14\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">CategoricalImputer</label><div class=\"sk-toggleable__content\"><pre>CategoricalImputer(fill_value=&#x27;missing&#x27;, variables=[&#x27;g&#x27;, &#x27;o&#x27;])</pre></div></div></div><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-15\" type=\"checkbox\" ><label for=\"sk-estimator-id-15\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">LogTransformer</label><div class=\"sk-toggleable__content\"><pre>LogTransformer(variables=[&#x27;c&#x27;, &#x27;monto&#x27;])</pre></div></div></div><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-16\" type=\"checkbox\" ><label for=\"sk-estimator-id-16\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">ArbitraryDiscretiser</label><div class=\"sk-toggleable__content\"><pre>ArbitraryDiscretiser(binning_dict={&#x27;e&#x27;: [-inf, 0, inf], &#x27;f&#x27;: [-inf, 0, inf]})</pre></div></div></div><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-17\" type=\"checkbox\" ><label for=\"sk-estimator-id-17\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">RareLabelEncoder</label><div class=\"sk-toggleable__content\"><pre>RareLabelEncoder(n_categories=1, tol=0.001, variables=[&#x27;g&#x27;, &#x27;j&#x27;, &#x27;o&#x27;, &#x27;p&#x27;])</pre></div></div></div><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-18\" type=\"checkbox\" ><label for=\"sk-estimator-id-18\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">OrdinalEncoder</label><div class=\"sk-toggleable__content\"><pre>OrdinalEncoder(variables=[&#x27;g&#x27;, &#x27;j&#x27;, &#x27;o&#x27;, &#x27;p&#x27;])</pre></div></div></div><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-19\" type=\"checkbox\" ><label for=\"sk-estimator-id-19\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">DatetimeFeatures</label><div class=\"sk-toggleable__content\"><pre>DatetimeFeatures(features_to_extract=&#x27;all&#x27;, variables=&#x27;fecha&#x27;)</pre></div></div></div><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-20\" type=\"checkbox\" ><label for=\"sk-estimator-id-20\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">ScalerDf</label><div class=\"sk-toggleable__content\"><pre>ScalerDf(method=&#x27;minmax&#x27;)</pre></div></div></div></div></div></div></div>"
819
+ ],
820
+ "text/plain": [
821
+ "Pipeline(steps=[('missing_indicator',\n",
822
+ " AddMissingIndicator(variables=['b', 'c', 'd', 'f', 'g', 'l',\n",
823
+ " 'm', 'o'])),\n",
824
+ " ('numerical_imputer',\n",
825
+ " MeanMedianImputer(variables=['b', 'c', 'd', 'f', 'l', 'm'])),\n",
826
+ " ('categorical_imputer',\n",
827
+ " CategoricalImputer(fill_value='missing',\n",
828
+ " variables=['g', 'o'])),\n",
829
+ " ('numerical_transformation',\n",
830
+ " LogTransformer(variables=['c', 'monto'])),\n",
831
+ " ('binarizer',\n",
832
+ " ArbitraryDiscretiser(binning_dict={'e': [-inf, 0, inf],\n",
833
+ " 'f': [-inf, 0, inf]})),\n",
834
+ " ('rare_label_encoder',\n",
835
+ " RareLabelEncoder(n_categories=1, tol=0.001,\n",
836
+ " variables=['g', 'j', 'o', 'p'])),\n",
837
+ " ('ordinal_encoder',\n",
838
+ " OrdinalEncoder(variables=['g', 'j', 'o', 'p'])),\n",
839
+ " ('datetime_features',\n",
840
+ " DatetimeFeatures(features_to_extract='all',\n",
841
+ " variables='fecha')),\n",
842
+ " ('scaler', ScalerDf(method='minmax'))])"
843
+ ]
844
+ },
845
+ "execution_count": 18,
846
+ "metadata": {},
847
+ "output_type": "execute_result"
848
+ }
849
+ ],
850
+ "source": [
851
+ "fraud_pipeline.fit(X_train, y_train)"
852
+ ]
853
+ },
854
+ {
855
+ "cell_type": "code",
856
+ "execution_count": 19,
857
+ "metadata": {},
858
+ "outputs": [
859
+ {
860
+ "data": {
861
+ "text/html": [
862
+ "<div>\n",
863
+ "<style scoped>\n",
864
+ " .dataframe tbody tr th:only-of-type {\n",
865
+ " vertical-align: middle;\n",
866
+ " }\n",
867
+ "\n",
868
+ " .dataframe tbody tr th {\n",
869
+ " vertical-align: top;\n",
870
+ " }\n",
871
+ "\n",
872
+ " .dataframe thead th {\n",
873
+ " text-align: right;\n",
874
+ " }\n",
875
+ "</style>\n",
876
+ "<table border=\"1\" class=\"dataframe\">\n",
877
+ " <thead>\n",
878
+ " <tr style=\"text-align: right;\">\n",
879
+ " <th></th>\n",
880
+ " <th>a</th>\n",
881
+ " <th>b</th>\n",
882
+ " <th>c</th>\n",
883
+ " <th>d</th>\n",
884
+ " <th>e</th>\n",
885
+ " <th>f</th>\n",
886
+ " <th>g</th>\n",
887
+ " <th>h</th>\n",
888
+ " <th>j</th>\n",
889
+ " <th>k</th>\n",
890
+ " <th>...</th>\n",
891
+ " <th>fecha_month_end</th>\n",
892
+ " <th>fecha_quarter_start</th>\n",
893
+ " <th>fecha_quarter_end</th>\n",
894
+ " <th>fecha_year_start</th>\n",
895
+ " <th>fecha_year_end</th>\n",
896
+ " <th>fecha_leap_year</th>\n",
897
+ " <th>fecha_days_in_month</th>\n",
898
+ " <th>fecha_hour</th>\n",
899
+ " <th>fecha_minute</th>\n",
900
+ " <th>fecha_second</th>\n",
901
+ " </tr>\n",
902
+ " </thead>\n",
903
+ " <tbody>\n",
904
+ " <tr>\n",
905
+ " <th>135569</th>\n",
906
+ " <td>1.000000</td>\n",
907
+ " <td>0.5217</td>\n",
908
+ " <td>0.635969</td>\n",
909
+ " <td>0.02</td>\n",
910
+ " <td>1.0</td>\n",
911
+ " <td>1.0</td>\n",
912
+ " <td>0.714286</td>\n",
913
+ " <td>0.620690</td>\n",
914
+ " <td>0.458599</td>\n",
915
+ " <td>0.636612</td>\n",
916
+ " <td>...</td>\n",
917
+ " <td>0.0</td>\n",
918
+ " <td>0.0</td>\n",
919
+ " <td>0.0</td>\n",
920
+ " <td>0.0</td>\n",
921
+ " <td>0.0</td>\n",
922
+ " <td>0.0</td>\n",
923
+ " <td>1.0</td>\n",
924
+ " <td>0.391304</td>\n",
925
+ " <td>0.525424</td>\n",
926
+ " <td>0.881356</td>\n",
927
+ " </tr>\n",
928
+ " <tr>\n",
929
+ " <th>78656</th>\n",
930
+ " <td>0.333333</td>\n",
931
+ " <td>0.7554</td>\n",
932
+ " <td>0.684908</td>\n",
933
+ " <td>0.02</td>\n",
934
+ " <td>0.0</td>\n",
935
+ " <td>1.0</td>\n",
936
+ " <td>0.428571</td>\n",
937
+ " <td>0.137931</td>\n",
938
+ " <td>0.133758</td>\n",
939
+ " <td>0.633268</td>\n",
940
+ " <td>...</td>\n",
941
+ " <td>0.0</td>\n",
942
+ " <td>0.0</td>\n",
943
+ " <td>0.0</td>\n",
944
+ " <td>0.0</td>\n",
945
+ " <td>0.0</td>\n",
946
+ " <td>0.0</td>\n",
947
+ " <td>1.0</td>\n",
948
+ " <td>0.347826</td>\n",
949
+ " <td>0.254237</td>\n",
950
+ " <td>0.288136</td>\n",
951
+ " </tr>\n",
952
+ " <tr>\n",
953
+ " <th>87437</th>\n",
954
+ " <td>1.000000</td>\n",
955
+ " <td>0.5437</td>\n",
956
+ " <td>0.741337</td>\n",
957
+ " <td>0.02</td>\n",
958
+ " <td>1.0</td>\n",
959
+ " <td>1.0</td>\n",
960
+ " <td>0.428571</td>\n",
961
+ " <td>0.793103</td>\n",
962
+ " <td>0.458599</td>\n",
963
+ " <td>0.735751</td>\n",
964
+ " <td>...</td>\n",
965
+ " <td>0.0</td>\n",
966
+ " <td>1.0</td>\n",
967
+ " <td>0.0</td>\n",
968
+ " <td>0.0</td>\n",
969
+ " <td>0.0</td>\n",
970
+ " <td>0.0</td>\n",
971
+ " <td>0.0</td>\n",
972
+ " <td>0.391304</td>\n",
973
+ " <td>0.050847</td>\n",
974
+ " <td>0.338983</td>\n",
975
+ " </tr>\n",
976
+ " <tr>\n",
977
+ " <th>131674</th>\n",
978
+ " <td>1.000000</td>\n",
979
+ " <td>0.7418</td>\n",
980
+ " <td>0.633959</td>\n",
981
+ " <td>1.00</td>\n",
982
+ " <td>1.0</td>\n",
983
+ " <td>1.0</td>\n",
984
+ " <td>0.714286</td>\n",
985
+ " <td>0.155172</td>\n",
986
+ " <td>0.458599</td>\n",
987
+ " <td>0.529368</td>\n",
988
+ " <td>...</td>\n",
989
+ " <td>0.0</td>\n",
990
+ " <td>0.0</td>\n",
991
+ " <td>0.0</td>\n",
992
+ " <td>0.0</td>\n",
993
+ " <td>0.0</td>\n",
994
+ " <td>0.0</td>\n",
995
+ " <td>0.0</td>\n",
996
+ " <td>0.782609</td>\n",
997
+ " <td>0.915254</td>\n",
998
+ " <td>0.101695</td>\n",
999
+ " </tr>\n",
1000
+ " <tr>\n",
1001
+ " <th>45535</th>\n",
1002
+ " <td>1.000000</td>\n",
1003
+ " <td>0.6463</td>\n",
1004
+ " <td>0.693916</td>\n",
1005
+ " <td>0.08</td>\n",
1006
+ " <td>1.0</td>\n",
1007
+ " <td>1.0</td>\n",
1008
+ " <td>0.428571</td>\n",
1009
+ " <td>0.379310</td>\n",
1010
+ " <td>0.458599</td>\n",
1011
+ " <td>0.049208</td>\n",
1012
+ " <td>...</td>\n",
1013
+ " <td>0.0</td>\n",
1014
+ " <td>0.0</td>\n",
1015
+ " <td>0.0</td>\n",
1016
+ " <td>0.0</td>\n",
1017
+ " <td>0.0</td>\n",
1018
+ " <td>0.0</td>\n",
1019
+ " <td>0.0</td>\n",
1020
+ " <td>0.913043</td>\n",
1021
+ " <td>0.406780</td>\n",
1022
+ " <td>0.508475</td>\n",
1023
+ " </tr>\n",
1024
+ " <tr>\n",
1025
+ " <th>...</th>\n",
1026
+ " <td>...</td>\n",
1027
+ " <td>...</td>\n",
1028
+ " <td>...</td>\n",
1029
+ " <td>...</td>\n",
1030
+ " <td>...</td>\n",
1031
+ " <td>...</td>\n",
1032
+ " <td>...</td>\n",
1033
+ " <td>...</td>\n",
1034
+ " <td>...</td>\n",
1035
+ " <td>...</td>\n",
1036
+ " <td>...</td>\n",
1037
+ " <td>...</td>\n",
1038
+ " <td>...</td>\n",
1039
+ " <td>...</td>\n",
1040
+ " <td>...</td>\n",
1041
+ " <td>...</td>\n",
1042
+ " <td>...</td>\n",
1043
+ " <td>...</td>\n",
1044
+ " <td>...</td>\n",
1045
+ " <td>...</td>\n",
1046
+ " <td>...</td>\n",
1047
+ " </tr>\n",
1048
+ " <tr>\n",
1049
+ " <th>41993</th>\n",
1050
+ " <td>1.000000</td>\n",
1051
+ " <td>0.8063</td>\n",
1052
+ " <td>0.831573</td>\n",
1053
+ " <td>0.06</td>\n",
1054
+ " <td>1.0</td>\n",
1055
+ " <td>0.0</td>\n",
1056
+ " <td>0.714286</td>\n",
1057
+ " <td>0.155172</td>\n",
1058
+ " <td>0.312102</td>\n",
1059
+ " <td>0.164571</td>\n",
1060
+ " <td>...</td>\n",
1061
+ " <td>0.0</td>\n",
1062
+ " <td>0.0</td>\n",
1063
+ " <td>0.0</td>\n",
1064
+ " <td>0.0</td>\n",
1065
+ " <td>0.0</td>\n",
1066
+ " <td>0.0</td>\n",
1067
+ " <td>0.0</td>\n",
1068
+ " <td>0.826087</td>\n",
1069
+ " <td>0.067797</td>\n",
1070
+ " <td>0.762712</td>\n",
1071
+ " </tr>\n",
1072
+ " <tr>\n",
1073
+ " <th>97639</th>\n",
1074
+ " <td>1.000000</td>\n",
1075
+ " <td>0.5046</td>\n",
1076
+ " <td>0.618473</td>\n",
1077
+ " <td>0.04</td>\n",
1078
+ " <td>0.0</td>\n",
1079
+ " <td>1.0</td>\n",
1080
+ " <td>0.428571</td>\n",
1081
+ " <td>0.155172</td>\n",
1082
+ " <td>0.458599</td>\n",
1083
+ " <td>0.288001</td>\n",
1084
+ " <td>...</td>\n",
1085
+ " <td>0.0</td>\n",
1086
+ " <td>0.0</td>\n",
1087
+ " <td>0.0</td>\n",
1088
+ " <td>0.0</td>\n",
1089
+ " <td>0.0</td>\n",
1090
+ " <td>0.0</td>\n",
1091
+ " <td>0.0</td>\n",
1092
+ " <td>0.826087</td>\n",
1093
+ " <td>0.169492</td>\n",
1094
+ " <td>0.186441</td>\n",
1095
+ " </tr>\n",
1096
+ " <tr>\n",
1097
+ " <th>95939</th>\n",
1098
+ " <td>1.000000</td>\n",
1099
+ " <td>0.7233</td>\n",
1100
+ " <td>0.686591</td>\n",
1101
+ " <td>0.02</td>\n",
1102
+ " <td>0.0</td>\n",
1103
+ " <td>0.0</td>\n",
1104
+ " <td>0.714286</td>\n",
1105
+ " <td>0.034483</td>\n",
1106
+ " <td>0.866242</td>\n",
1107
+ " <td>0.585850</td>\n",
1108
+ " <td>...</td>\n",
1109
+ " <td>0.0</td>\n",
1110
+ " <td>0.0</td>\n",
1111
+ " <td>0.0</td>\n",
1112
+ " <td>0.0</td>\n",
1113
+ " <td>0.0</td>\n",
1114
+ " <td>0.0</td>\n",
1115
+ " <td>1.0</td>\n",
1116
+ " <td>0.869565</td>\n",
1117
+ " <td>0.372881</td>\n",
1118
+ " <td>0.847458</td>\n",
1119
+ " </tr>\n",
1120
+ " <tr>\n",
1121
+ " <th>117952</th>\n",
1122
+ " <td>1.000000</td>\n",
1123
+ " <td>0.7824</td>\n",
1124
+ " <td>0.710351</td>\n",
1125
+ " <td>0.96</td>\n",
1126
+ " <td>1.0</td>\n",
1127
+ " <td>1.0</td>\n",
1128
+ " <td>0.714286</td>\n",
1129
+ " <td>0.086207</td>\n",
1130
+ " <td>0.458599</td>\n",
1131
+ " <td>0.007728</td>\n",
1132
+ " <td>...</td>\n",
1133
+ " <td>0.0</td>\n",
1134
+ " <td>0.0</td>\n",
1135
+ " <td>0.0</td>\n",
1136
+ " <td>0.0</td>\n",
1137
+ " <td>0.0</td>\n",
1138
+ " <td>0.0</td>\n",
1139
+ " <td>0.0</td>\n",
1140
+ " <td>0.000000</td>\n",
1141
+ " <td>0.406780</td>\n",
1142
+ " <td>0.779661</td>\n",
1143
+ " </tr>\n",
1144
+ " <tr>\n",
1145
+ " <th>43567</th>\n",
1146
+ " <td>1.000000</td>\n",
1147
+ " <td>0.7225</td>\n",
1148
+ " <td>0.468508</td>\n",
1149
+ " <td>1.00</td>\n",
1150
+ " <td>0.0</td>\n",
1151
+ " <td>1.0</td>\n",
1152
+ " <td>0.714286</td>\n",
1153
+ " <td>0.051724</td>\n",
1154
+ " <td>0.458599</td>\n",
1155
+ " <td>0.617746</td>\n",
1156
+ " <td>...</td>\n",
1157
+ " <td>0.0</td>\n",
1158
+ " <td>0.0</td>\n",
1159
+ " <td>0.0</td>\n",
1160
+ " <td>0.0</td>\n",
1161
+ " <td>0.0</td>\n",
1162
+ " <td>0.0</td>\n",
1163
+ " <td>1.0</td>\n",
1164
+ " <td>0.913043</td>\n",
1165
+ " <td>0.288136</td>\n",
1166
+ " <td>0.305085</td>\n",
1167
+ " </tr>\n",
1168
+ " </tbody>\n",
1169
+ "</table>\n",
1170
+ "<p>135000 rows Γ— 45 columns</p>\n",
1171
+ "</div>"
1172
+ ],
1173
+ "text/plain": [
1174
+ " a b c d e f g h \\\n",
1175
+ "135569 1.000000 0.5217 0.635969 0.02 1.0 1.0 0.714286 0.620690 \n",
1176
+ "78656 0.333333 0.7554 0.684908 0.02 0.0 1.0 0.428571 0.137931 \n",
1177
+ "87437 1.000000 0.5437 0.741337 0.02 1.0 1.0 0.428571 0.793103 \n",
1178
+ "131674 1.000000 0.7418 0.633959 1.00 1.0 1.0 0.714286 0.155172 \n",
1179
+ "45535 1.000000 0.6463 0.693916 0.08 1.0 1.0 0.428571 0.379310 \n",
1180
+ "... ... ... ... ... ... ... ... ... \n",
1181
+ "41993 1.000000 0.8063 0.831573 0.06 1.0 0.0 0.714286 0.155172 \n",
1182
+ "97639 1.000000 0.5046 0.618473 0.04 0.0 1.0 0.428571 0.155172 \n",
1183
+ "95939 1.000000 0.7233 0.686591 0.02 0.0 0.0 0.714286 0.034483 \n",
1184
+ "117952 1.000000 0.7824 0.710351 0.96 1.0 1.0 0.714286 0.086207 \n",
1185
+ "43567 1.000000 0.7225 0.468508 1.00 0.0 1.0 0.714286 0.051724 \n",
1186
+ "\n",
1187
+ " j k ... fecha_month_end fecha_quarter_start \\\n",
1188
+ "135569 0.458599 0.636612 ... 0.0 0.0 \n",
1189
+ "78656 0.133758 0.633268 ... 0.0 0.0 \n",
1190
+ "87437 0.458599 0.735751 ... 0.0 1.0 \n",
1191
+ "131674 0.458599 0.529368 ... 0.0 0.0 \n",
1192
+ "45535 0.458599 0.049208 ... 0.0 0.0 \n",
1193
+ "... ... ... ... ... ... \n",
1194
+ "41993 0.312102 0.164571 ... 0.0 0.0 \n",
1195
+ "97639 0.458599 0.288001 ... 0.0 0.0 \n",
1196
+ "95939 0.866242 0.585850 ... 0.0 0.0 \n",
1197
+ "117952 0.458599 0.007728 ... 0.0 0.0 \n",
1198
+ "43567 0.458599 0.617746 ... 0.0 0.0 \n",
1199
+ "\n",
1200
+ " fecha_quarter_end fecha_year_start fecha_year_end fecha_leap_year \\\n",
1201
+ "135569 0.0 0.0 0.0 0.0 \n",
1202
+ "78656 0.0 0.0 0.0 0.0 \n",
1203
+ "87437 0.0 0.0 0.0 0.0 \n",
1204
+ "131674 0.0 0.0 0.0 0.0 \n",
1205
+ "45535 0.0 0.0 0.0 0.0 \n",
1206
+ "... ... ... ... ... \n",
1207
+ "41993 0.0 0.0 0.0 0.0 \n",
1208
+ "97639 0.0 0.0 0.0 0.0 \n",
1209
+ "95939 0.0 0.0 0.0 0.0 \n",
1210
+ "117952 0.0 0.0 0.0 0.0 \n",
1211
+ "43567 0.0 0.0 0.0 0.0 \n",
1212
+ "\n",
1213
+ " fecha_days_in_month fecha_hour fecha_minute fecha_second \n",
1214
+ "135569 1.0 0.391304 0.525424 0.881356 \n",
1215
+ "78656 1.0 0.347826 0.254237 0.288136 \n",
1216
+ "87437 0.0 0.391304 0.050847 0.338983 \n",
1217
+ "131674 0.0 0.782609 0.915254 0.101695 \n",
1218
+ "45535 0.0 0.913043 0.406780 0.508475 \n",
1219
+ "... ... ... ... ... \n",
1220
+ "41993 0.0 0.826087 0.067797 0.762712 \n",
1221
+ "97639 0.0 0.826087 0.169492 0.186441 \n",
1222
+ "95939 1.0 0.869565 0.372881 0.847458 \n",
1223
+ "117952 0.0 0.000000 0.406780 0.779661 \n",
1224
+ "43567 1.0 0.913043 0.288136 0.305085 \n",
1225
+ "\n",
1226
+ "[135000 rows x 45 columns]"
1227
+ ]
1228
+ },
1229
+ "execution_count": 19,
1230
+ "metadata": {},
1231
+ "output_type": "execute_result"
1232
+ }
1233
+ ],
1234
+ "source": [
1235
+ "fraud_pipeline.transform(X_train)"
1236
+ ]
1237
+ },
1238
+ {
1239
+ "cell_type": "code",
1240
+ "execution_count": 20,
1241
+ "metadata": {},
1242
+ "outputs": [
1243
+ {
1244
+ "data": {
1245
+ "text/plain": [
1246
+ "['../models/feature_engineering_pipeline.joblib']"
1247
+ ]
1248
+ },
1249
+ "execution_count": 20,
1250
+ "metadata": {},
1251
+ "output_type": "execute_result"
1252
+ }
1253
+ ],
1254
+ "source": [
1255
+ "joblib.dump(fraud_pipeline, '../models/feature_engineering_pipeline.joblib')"
1256
+ ]
1257
+ },
1258
+ {
1259
+ "cell_type": "code",
1260
+ "execution_count": null,
1261
+ "metadata": {},
1262
+ "outputs": [],
1263
+ "source": []
1264
+ }
1265
+ ],
1266
+ "metadata": {
1267
+ "kernelspec": {
1268
+ "display_name": "fraud-detection",
1269
+ "language": "python",
1270
+ "name": "python3"
1271
+ },
1272
+ "language_info": {
1273
+ "codemirror_mode": {
1274
+ "name": "ipython",
1275
+ "version": 3
1276
+ },
1277
+ "file_extension": ".py",
1278
+ "mimetype": "text/x-python",
1279
+ "name": "python",
1280
+ "nbconvert_exporter": "python",
1281
+ "pygments_lexer": "ipython3",
1282
+ "version": "3.10.12"
1283
+ },
1284
+ "orig_nbformat": 4,
1285
+ "vscode": {
1286
+ "interpreter": {
1287
+ "hash": "45e631c81adbf0cb55b2526738ae1a14c53cfa3f28a6ae1bee5619daf3ab935d"
1288
+ }
1289
+ }
1290
+ },
1291
+ "nbformat": 4,
1292
+ "nbformat_minor": 2
1293
+ }
notebooks/03-feature_selection.ipynb ADDED
@@ -0,0 +1,837 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "import joblib\n",
10
+ "import pandas as pd\n",
11
+ "from feature_engine.selection import ProbeFeatureSelection\n",
12
+ "from sklearn.model_selection import train_test_split\n",
13
+ "from sklearn.ensemble import RandomForestClassifier\n",
14
+ "from sklearn.linear_model import LogisticRegression"
15
+ ]
16
+ },
17
+ {
18
+ "cell_type": "code",
19
+ "execution_count": 2,
20
+ "metadata": {},
21
+ "outputs": [
22
+ {
23
+ "name": "stdout",
24
+ "output_type": "stream",
25
+ "text": [
26
+ "(150000, 19)\n"
27
+ ]
28
+ },
29
+ {
30
+ "data": {
31
+ "text/html": [
32
+ "<div>\n",
33
+ "<style scoped>\n",
34
+ " .dataframe tbody tr th:only-of-type {\n",
35
+ " vertical-align: middle;\n",
36
+ " }\n",
37
+ "\n",
38
+ " .dataframe tbody tr th {\n",
39
+ " vertical-align: top;\n",
40
+ " }\n",
41
+ "\n",
42
+ " .dataframe thead th {\n",
43
+ " text-align: right;\n",
44
+ " }\n",
45
+ "</style>\n",
46
+ "<table border=\"1\" class=\"dataframe\">\n",
47
+ " <thead>\n",
48
+ " <tr style=\"text-align: right;\">\n",
49
+ " <th></th>\n",
50
+ " <th>a</th>\n",
51
+ " <th>b</th>\n",
52
+ " <th>c</th>\n",
53
+ " <th>d</th>\n",
54
+ " <th>e</th>\n",
55
+ " <th>f</th>\n",
56
+ " <th>g</th>\n",
57
+ " <th>h</th>\n",
58
+ " <th>j</th>\n",
59
+ " <th>k</th>\n",
60
+ " <th>l</th>\n",
61
+ " <th>m</th>\n",
62
+ " <th>n</th>\n",
63
+ " <th>o</th>\n",
64
+ " <th>p</th>\n",
65
+ " <th>fecha</th>\n",
66
+ " <th>monto</th>\n",
67
+ " <th>score</th>\n",
68
+ " <th>fraude</th>\n",
69
+ " </tr>\n",
70
+ " </thead>\n",
71
+ " <tbody>\n",
72
+ " <tr>\n",
73
+ " <th>0</th>\n",
74
+ " <td>4</td>\n",
75
+ " <td>0.6812</td>\n",
76
+ " <td>50084.12</td>\n",
77
+ " <td>50.0</td>\n",
78
+ " <td>0.000000</td>\n",
79
+ " <td>20.0</td>\n",
80
+ " <td>AR</td>\n",
81
+ " <td>1</td>\n",
82
+ " <td>cat_d26ab52</td>\n",
83
+ " <td>0.365475</td>\n",
84
+ " <td>2479.0</td>\n",
85
+ " <td>952.0</td>\n",
86
+ " <td>1</td>\n",
87
+ " <td>NaN</td>\n",
88
+ " <td>Y</td>\n",
89
+ " <td>2020-03-20 09:28:19</td>\n",
90
+ " <td>57.63</td>\n",
91
+ " <td>100</td>\n",
92
+ " <td>0</td>\n",
93
+ " </tr>\n",
94
+ " <tr>\n",
95
+ " <th>1</th>\n",
96
+ " <td>4</td>\n",
97
+ " <td>0.6694</td>\n",
98
+ " <td>66005.49</td>\n",
99
+ " <td>0.0</td>\n",
100
+ " <td>0.000000</td>\n",
101
+ " <td>2.0</td>\n",
102
+ " <td>AR</td>\n",
103
+ " <td>1</td>\n",
104
+ " <td>cat_ea962fb</td>\n",
105
+ " <td>0.612728</td>\n",
106
+ " <td>2603.0</td>\n",
107
+ " <td>105.0</td>\n",
108
+ " <td>1</td>\n",
109
+ " <td>Y</td>\n",
110
+ " <td>Y</td>\n",
111
+ " <td>2020-03-09 13:58:28</td>\n",
112
+ " <td>40.19</td>\n",
113
+ " <td>25</td>\n",
114
+ " <td>0</td>\n",
115
+ " </tr>\n",
116
+ " <tr>\n",
117
+ " <th>2</th>\n",
118
+ " <td>4</td>\n",
119
+ " <td>0.4718</td>\n",
120
+ " <td>7059.05</td>\n",
121
+ " <td>4.0</td>\n",
122
+ " <td>0.463488</td>\n",
123
+ " <td>92.0</td>\n",
124
+ " <td>BR</td>\n",
125
+ " <td>25</td>\n",
126
+ " <td>cat_4c2544e</td>\n",
127
+ " <td>0.651835</td>\n",
128
+ " <td>2153.0</td>\n",
129
+ " <td>249.0</td>\n",
130
+ " <td>1</td>\n",
131
+ " <td>Y</td>\n",
132
+ " <td>Y</td>\n",
133
+ " <td>2020-04-08 12:25:55</td>\n",
134
+ " <td>5.77</td>\n",
135
+ " <td>23</td>\n",
136
+ " <td>0</td>\n",
137
+ " </tr>\n",
138
+ " <tr>\n",
139
+ " <th>3</th>\n",
140
+ " <td>4</td>\n",
141
+ " <td>0.7260</td>\n",
142
+ " <td>10043.10</td>\n",
143
+ " <td>24.0</td>\n",
144
+ " <td>0.046845</td>\n",
145
+ " <td>43.0</td>\n",
146
+ " <td>BR</td>\n",
147
+ " <td>43</td>\n",
148
+ " <td>cat_1b59ee3</td>\n",
149
+ " <td>0.692728</td>\n",
150
+ " <td>4845.0</td>\n",
151
+ " <td>141.0</td>\n",
152
+ " <td>1</td>\n",
153
+ " <td>N</td>\n",
154
+ " <td>Y</td>\n",
155
+ " <td>2020-03-14 11:46:13</td>\n",
156
+ " <td>40.89</td>\n",
157
+ " <td>23</td>\n",
158
+ " <td>0</td>\n",
159
+ " </tr>\n",
160
+ " <tr>\n",
161
+ " <th>4</th>\n",
162
+ " <td>4</td>\n",
163
+ " <td>0.7758</td>\n",
164
+ " <td>16584.42</td>\n",
165
+ " <td>2.0</td>\n",
166
+ " <td>0.154616</td>\n",
167
+ " <td>54.0</td>\n",
168
+ " <td>BR</td>\n",
169
+ " <td>0</td>\n",
170
+ " <td>cat_9bacaa5</td>\n",
171
+ " <td>0.201354</td>\n",
172
+ " <td>2856.0</td>\n",
173
+ " <td>18.0</td>\n",
174
+ " <td>1</td>\n",
175
+ " <td>Y</td>\n",
176
+ " <td>N</td>\n",
177
+ " <td>2020-03-23 14:17:13</td>\n",
178
+ " <td>18.98</td>\n",
179
+ " <td>71</td>\n",
180
+ " <td>0</td>\n",
181
+ " </tr>\n",
182
+ " </tbody>\n",
183
+ "</table>\n",
184
+ "</div>"
185
+ ],
186
+ "text/plain": [
187
+ " a b c d e f g h j k \\\n",
188
+ "0 4 0.6812 50084.12 50.0 0.000000 20.0 AR 1 cat_d26ab52 0.365475 \n",
189
+ "1 4 0.6694 66005.49 0.0 0.000000 2.0 AR 1 cat_ea962fb 0.612728 \n",
190
+ "2 4 0.4718 7059.05 4.0 0.463488 92.0 BR 25 cat_4c2544e 0.651835 \n",
191
+ "3 4 0.7260 10043.10 24.0 0.046845 43.0 BR 43 cat_1b59ee3 0.692728 \n",
192
+ "4 4 0.7758 16584.42 2.0 0.154616 54.0 BR 0 cat_9bacaa5 0.201354 \n",
193
+ "\n",
194
+ " l m n o p fecha monto score fraude \n",
195
+ "0 2479.0 952.0 1 NaN Y 2020-03-20 09:28:19 57.63 100 0 \n",
196
+ "1 2603.0 105.0 1 Y Y 2020-03-09 13:58:28 40.19 25 0 \n",
197
+ "2 2153.0 249.0 1 Y Y 2020-04-08 12:25:55 5.77 23 0 \n",
198
+ "3 4845.0 141.0 1 N Y 2020-03-14 11:46:13 40.89 23 0 \n",
199
+ "4 2856.0 18.0 1 Y N 2020-03-23 14:17:13 18.98 71 0 "
200
+ ]
201
+ },
202
+ "execution_count": 2,
203
+ "metadata": {},
204
+ "output_type": "execute_result"
205
+ }
206
+ ],
207
+ "source": [
208
+ "data = pd.read_csv('../data/MercadoLibre Data Scientist Technical Challenge - Dataset.csv')\n",
209
+ "print(data.shape)\n",
210
+ "data.head()"
211
+ ]
212
+ },
213
+ {
214
+ "cell_type": "code",
215
+ "execution_count": 3,
216
+ "metadata": {},
217
+ "outputs": [
218
+ {
219
+ "data": {
220
+ "text/plain": [
221
+ "((135000, 18), (15000, 18))"
222
+ ]
223
+ },
224
+ "execution_count": 3,
225
+ "metadata": {},
226
+ "output_type": "execute_result"
227
+ }
228
+ ],
229
+ "source": [
230
+ "X_train, X_test, y_train, y_test = train_test_split(\n",
231
+ " data.drop(['fraude'], axis=1), # predictive variables\n",
232
+ " data['fraude'], # target\n",
233
+ " test_size=0.1, # portion of dataset to allocate to test set\n",
234
+ " random_state=0, # we are setting the seed here\n",
235
+ ")\n",
236
+ "\n",
237
+ "X_train.shape, X_test.shape"
238
+ ]
239
+ },
240
+ {
241
+ "cell_type": "code",
242
+ "execution_count": 4,
243
+ "metadata": {},
244
+ "outputs": [],
245
+ "source": [
246
+ "fraud_pipeline = joblib.load('../models/feature_engineering_pipeline.joblib')"
247
+ ]
248
+ },
249
+ {
250
+ "cell_type": "code",
251
+ "execution_count": 5,
252
+ "metadata": {},
253
+ "outputs": [
254
+ {
255
+ "data": {
256
+ "text/html": [
257
+ "<style>#sk-container-id-1 {color: black;}#sk-container-id-1 pre{padding: 0;}#sk-container-id-1 div.sk-toggleable {background-color: white;}#sk-container-id-1 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-1 label.sk-toggleable__label-arrow:before {content: \"β–Έ\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-1 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-1 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-1 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"β–Ύ\";}#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-1 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-1 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-1 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-1 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-1 div.sk-item {position: relative;z-index: 1;}#sk-container-id-1 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-1 div.sk-item::before, #sk-container-id-1 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-1 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-1 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-1 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-1 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-1 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-1 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-1 div.sk-label-container {text-align: center;}#sk-container-id-1 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-1 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-1\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>Pipeline(steps=[(&#x27;missing_indicator&#x27;,\n",
258
+ " AddMissingIndicator(variables=[&#x27;b&#x27;, &#x27;c&#x27;, &#x27;d&#x27;, &#x27;f&#x27;, &#x27;g&#x27;, &#x27;l&#x27;,\n",
259
+ " &#x27;m&#x27;, &#x27;o&#x27;])),\n",
260
+ " (&#x27;numerical_imputer&#x27;,\n",
261
+ " MeanMedianImputer(variables=[&#x27;b&#x27;, &#x27;c&#x27;, &#x27;d&#x27;, &#x27;f&#x27;, &#x27;l&#x27;, &#x27;m&#x27;])),\n",
262
+ " (&#x27;categorical_imputer&#x27;,\n",
263
+ " CategoricalImputer(fill_value=&#x27;missing&#x27;,\n",
264
+ " variables=[&#x27;g&#x27;, &#x27;o&#x27;])),\n",
265
+ " (&#x27;numerical_transformation&#x27;,\n",
266
+ " LogTransformer(variables=[&#x27;c&#x27;, &#x27;monto&#x27;])),\n",
267
+ " (&#x27;binarizer&#x27;,\n",
268
+ " ArbitraryDiscretiser(binning_dict={&#x27;e&#x27;: [-inf, 0, inf],\n",
269
+ " &#x27;f&#x27;: [-inf, 0, inf]})),\n",
270
+ " (&#x27;rare_label_encoder&#x27;,\n",
271
+ " RareLabelEncoder(n_categories=1, tol=0.001,\n",
272
+ " variables=[&#x27;g&#x27;, &#x27;j&#x27;, &#x27;o&#x27;, &#x27;p&#x27;])),\n",
273
+ " (&#x27;ordinal_encoder&#x27;,\n",
274
+ " OrdinalEncoder(variables=[&#x27;g&#x27;, &#x27;j&#x27;, &#x27;o&#x27;, &#x27;p&#x27;])),\n",
275
+ " (&#x27;datetime_features&#x27;,\n",
276
+ " DatetimeFeatures(features_to_extract=&#x27;all&#x27;,\n",
277
+ " variables=&#x27;fecha&#x27;)),\n",
278
+ " (&#x27;scaler&#x27;, ScalerDf(method=&#x27;minmax&#x27;))])</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-1\" type=\"checkbox\" ><label for=\"sk-estimator-id-1\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">Pipeline</label><div class=\"sk-toggleable__content\"><pre>Pipeline(steps=[(&#x27;missing_indicator&#x27;,\n",
279
+ " AddMissingIndicator(variables=[&#x27;b&#x27;, &#x27;c&#x27;, &#x27;d&#x27;, &#x27;f&#x27;, &#x27;g&#x27;, &#x27;l&#x27;,\n",
280
+ " &#x27;m&#x27;, &#x27;o&#x27;])),\n",
281
+ " (&#x27;numerical_imputer&#x27;,\n",
282
+ " MeanMedianImputer(variables=[&#x27;b&#x27;, &#x27;c&#x27;, &#x27;d&#x27;, &#x27;f&#x27;, &#x27;l&#x27;, &#x27;m&#x27;])),\n",
283
+ " (&#x27;categorical_imputer&#x27;,\n",
284
+ " CategoricalImputer(fill_value=&#x27;missing&#x27;,\n",
285
+ " variables=[&#x27;g&#x27;, &#x27;o&#x27;])),\n",
286
+ " (&#x27;numerical_transformation&#x27;,\n",
287
+ " LogTransformer(variables=[&#x27;c&#x27;, &#x27;monto&#x27;])),\n",
288
+ " (&#x27;binarizer&#x27;,\n",
289
+ " ArbitraryDiscretiser(binning_dict={&#x27;e&#x27;: [-inf, 0, inf],\n",
290
+ " &#x27;f&#x27;: [-inf, 0, inf]})),\n",
291
+ " (&#x27;rare_label_encoder&#x27;,\n",
292
+ " RareLabelEncoder(n_categories=1, tol=0.001,\n",
293
+ " variables=[&#x27;g&#x27;, &#x27;j&#x27;, &#x27;o&#x27;, &#x27;p&#x27;])),\n",
294
+ " (&#x27;ordinal_encoder&#x27;,\n",
295
+ " OrdinalEncoder(variables=[&#x27;g&#x27;, &#x27;j&#x27;, &#x27;o&#x27;, &#x27;p&#x27;])),\n",
296
+ " (&#x27;datetime_features&#x27;,\n",
297
+ " DatetimeFeatures(features_to_extract=&#x27;all&#x27;,\n",
298
+ " variables=&#x27;fecha&#x27;)),\n",
299
+ " (&#x27;scaler&#x27;, ScalerDf(method=&#x27;minmax&#x27;))])</pre></div></div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-2\" type=\"checkbox\" ><label for=\"sk-estimator-id-2\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">AddMissingIndicator</label><div class=\"sk-toggleable__content\"><pre>AddMissingIndicator(variables=[&#x27;b&#x27;, &#x27;c&#x27;, &#x27;d&#x27;, &#x27;f&#x27;, &#x27;g&#x27;, &#x27;l&#x27;, &#x27;m&#x27;, &#x27;o&#x27;])</pre></div></div></div><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-3\" type=\"checkbox\" ><label for=\"sk-estimator-id-3\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">MeanMedianImputer</label><div class=\"sk-toggleable__content\"><pre>MeanMedianImputer(variables=[&#x27;b&#x27;, &#x27;c&#x27;, &#x27;d&#x27;, &#x27;f&#x27;, &#x27;l&#x27;, &#x27;m&#x27;])</pre></div></div></div><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-4\" type=\"checkbox\" ><label for=\"sk-estimator-id-4\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">CategoricalImputer</label><div class=\"sk-toggleable__content\"><pre>CategoricalImputer(fill_value=&#x27;missing&#x27;, variables=[&#x27;g&#x27;, &#x27;o&#x27;])</pre></div></div></div><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-5\" type=\"checkbox\" ><label for=\"sk-estimator-id-5\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">LogTransformer</label><div class=\"sk-toggleable__content\"><pre>LogTransformer(variables=[&#x27;c&#x27;, &#x27;monto&#x27;])</pre></div></div></div><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-6\" type=\"checkbox\" ><label for=\"sk-estimator-id-6\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">ArbitraryDiscretiser</label><div class=\"sk-toggleable__content\"><pre>ArbitraryDiscretiser(binning_dict={&#x27;e&#x27;: [-inf, 0, inf], &#x27;f&#x27;: [-inf, 0, inf]})</pre></div></div></div><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-7\" type=\"checkbox\" ><label for=\"sk-estimator-id-7\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">RareLabelEncoder</label><div class=\"sk-toggleable__content\"><pre>RareLabelEncoder(n_categories=1, tol=0.001, variables=[&#x27;g&#x27;, &#x27;j&#x27;, &#x27;o&#x27;, &#x27;p&#x27;])</pre></div></div></div><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-8\" type=\"checkbox\" ><label for=\"sk-estimator-id-8\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">OrdinalEncoder</label><div class=\"sk-toggleable__content\"><pre>OrdinalEncoder(variables=[&#x27;g&#x27;, &#x27;j&#x27;, &#x27;o&#x27;, &#x27;p&#x27;])</pre></div></div></div><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-9\" type=\"checkbox\" ><label for=\"sk-estimator-id-9\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">DatetimeFeatures</label><div class=\"sk-toggleable__content\"><pre>DatetimeFeatures(features_to_extract=&#x27;all&#x27;, variables=&#x27;fecha&#x27;)</pre></div></div></div><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-10\" type=\"checkbox\" ><label for=\"sk-estimator-id-10\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">ScalerDf</label><div class=\"sk-toggleable__content\"><pre>ScalerDf(method=&#x27;minmax&#x27;)</pre></div></div></div></div></div></div></div>"
300
+ ],
301
+ "text/plain": [
302
+ "Pipeline(steps=[('missing_indicator',\n",
303
+ " AddMissingIndicator(variables=['b', 'c', 'd', 'f', 'g', 'l',\n",
304
+ " 'm', 'o'])),\n",
305
+ " ('numerical_imputer',\n",
306
+ " MeanMedianImputer(variables=['b', 'c', 'd', 'f', 'l', 'm'])),\n",
307
+ " ('categorical_imputer',\n",
308
+ " CategoricalImputer(fill_value='missing',\n",
309
+ " variables=['g', 'o'])),\n",
310
+ " ('numerical_transformation',\n",
311
+ " LogTransformer(variables=['c', 'monto'])),\n",
312
+ " ('binarizer',\n",
313
+ " ArbitraryDiscretiser(binning_dict={'e': [-inf, 0, inf],\n",
314
+ " 'f': [-inf, 0, inf]})),\n",
315
+ " ('rare_label_encoder',\n",
316
+ " RareLabelEncoder(n_categories=1, tol=0.001,\n",
317
+ " variables=['g', 'j', 'o', 'p'])),\n",
318
+ " ('ordinal_encoder',\n",
319
+ " OrdinalEncoder(variables=['g', 'j', 'o', 'p'])),\n",
320
+ " ('datetime_features',\n",
321
+ " DatetimeFeatures(features_to_extract='all',\n",
322
+ " variables='fecha')),\n",
323
+ " ('scaler', ScalerDf(method='minmax'))])"
324
+ ]
325
+ },
326
+ "execution_count": 5,
327
+ "metadata": {},
328
+ "output_type": "execute_result"
329
+ }
330
+ ],
331
+ "source": [
332
+ "fraud_pipeline"
333
+ ]
334
+ },
335
+ {
336
+ "cell_type": "code",
337
+ "execution_count": 6,
338
+ "metadata": {},
339
+ "outputs": [],
340
+ "source": [
341
+ "X_train_transformed = fraud_pipeline.transform(X_train)"
342
+ ]
343
+ },
344
+ {
345
+ "cell_type": "code",
346
+ "execution_count": 7,
347
+ "metadata": {},
348
+ "outputs": [
349
+ {
350
+ "data": {
351
+ "text/html": [
352
+ "<div>\n",
353
+ "<style scoped>\n",
354
+ " .dataframe tbody tr th:only-of-type {\n",
355
+ " vertical-align: middle;\n",
356
+ " }\n",
357
+ "\n",
358
+ " .dataframe tbody tr th {\n",
359
+ " vertical-align: top;\n",
360
+ " }\n",
361
+ "\n",
362
+ " .dataframe thead th {\n",
363
+ " text-align: right;\n",
364
+ " }\n",
365
+ "</style>\n",
366
+ "<table border=\"1\" class=\"dataframe\">\n",
367
+ " <thead>\n",
368
+ " <tr style=\"text-align: right;\">\n",
369
+ " <th></th>\n",
370
+ " <th>a</th>\n",
371
+ " <th>b</th>\n",
372
+ " <th>c</th>\n",
373
+ " <th>d</th>\n",
374
+ " <th>e</th>\n",
375
+ " <th>f</th>\n",
376
+ " <th>g</th>\n",
377
+ " <th>h</th>\n",
378
+ " <th>j</th>\n",
379
+ " <th>k</th>\n",
380
+ " <th>...</th>\n",
381
+ " <th>fecha_month_end</th>\n",
382
+ " <th>fecha_quarter_start</th>\n",
383
+ " <th>fecha_quarter_end</th>\n",
384
+ " <th>fecha_year_start</th>\n",
385
+ " <th>fecha_year_end</th>\n",
386
+ " <th>fecha_leap_year</th>\n",
387
+ " <th>fecha_days_in_month</th>\n",
388
+ " <th>fecha_hour</th>\n",
389
+ " <th>fecha_minute</th>\n",
390
+ " <th>fecha_second</th>\n",
391
+ " </tr>\n",
392
+ " </thead>\n",
393
+ " <tbody>\n",
394
+ " <tr>\n",
395
+ " <th>135569</th>\n",
396
+ " <td>1.000000</td>\n",
397
+ " <td>0.5217</td>\n",
398
+ " <td>0.635969</td>\n",
399
+ " <td>0.02</td>\n",
400
+ " <td>1.0</td>\n",
401
+ " <td>1.0</td>\n",
402
+ " <td>0.714286</td>\n",
403
+ " <td>0.620690</td>\n",
404
+ " <td>0.458599</td>\n",
405
+ " <td>0.636612</td>\n",
406
+ " <td>...</td>\n",
407
+ " <td>0.0</td>\n",
408
+ " <td>0.0</td>\n",
409
+ " <td>0.0</td>\n",
410
+ " <td>0.0</td>\n",
411
+ " <td>0.0</td>\n",
412
+ " <td>0.0</td>\n",
413
+ " <td>1.0</td>\n",
414
+ " <td>0.391304</td>\n",
415
+ " <td>0.525424</td>\n",
416
+ " <td>0.881356</td>\n",
417
+ " </tr>\n",
418
+ " <tr>\n",
419
+ " <th>78656</th>\n",
420
+ " <td>0.333333</td>\n",
421
+ " <td>0.7554</td>\n",
422
+ " <td>0.684908</td>\n",
423
+ " <td>0.02</td>\n",
424
+ " <td>0.0</td>\n",
425
+ " <td>1.0</td>\n",
426
+ " <td>0.428571</td>\n",
427
+ " <td>0.137931</td>\n",
428
+ " <td>0.133758</td>\n",
429
+ " <td>0.633268</td>\n",
430
+ " <td>...</td>\n",
431
+ " <td>0.0</td>\n",
432
+ " <td>0.0</td>\n",
433
+ " <td>0.0</td>\n",
434
+ " <td>0.0</td>\n",
435
+ " <td>0.0</td>\n",
436
+ " <td>0.0</td>\n",
437
+ " <td>1.0</td>\n",
438
+ " <td>0.347826</td>\n",
439
+ " <td>0.254237</td>\n",
440
+ " <td>0.288136</td>\n",
441
+ " </tr>\n",
442
+ " <tr>\n",
443
+ " <th>87437</th>\n",
444
+ " <td>1.000000</td>\n",
445
+ " <td>0.5437</td>\n",
446
+ " <td>0.741337</td>\n",
447
+ " <td>0.02</td>\n",
448
+ " <td>1.0</td>\n",
449
+ " <td>1.0</td>\n",
450
+ " <td>0.428571</td>\n",
451
+ " <td>0.793103</td>\n",
452
+ " <td>0.458599</td>\n",
453
+ " <td>0.735751</td>\n",
454
+ " <td>...</td>\n",
455
+ " <td>0.0</td>\n",
456
+ " <td>1.0</td>\n",
457
+ " <td>0.0</td>\n",
458
+ " <td>0.0</td>\n",
459
+ " <td>0.0</td>\n",
460
+ " <td>0.0</td>\n",
461
+ " <td>0.0</td>\n",
462
+ " <td>0.391304</td>\n",
463
+ " <td>0.050847</td>\n",
464
+ " <td>0.338983</td>\n",
465
+ " </tr>\n",
466
+ " <tr>\n",
467
+ " <th>131674</th>\n",
468
+ " <td>1.000000</td>\n",
469
+ " <td>0.7418</td>\n",
470
+ " <td>0.633959</td>\n",
471
+ " <td>1.00</td>\n",
472
+ " <td>1.0</td>\n",
473
+ " <td>1.0</td>\n",
474
+ " <td>0.714286</td>\n",
475
+ " <td>0.155172</td>\n",
476
+ " <td>0.458599</td>\n",
477
+ " <td>0.529368</td>\n",
478
+ " <td>...</td>\n",
479
+ " <td>0.0</td>\n",
480
+ " <td>0.0</td>\n",
481
+ " <td>0.0</td>\n",
482
+ " <td>0.0</td>\n",
483
+ " <td>0.0</td>\n",
484
+ " <td>0.0</td>\n",
485
+ " <td>0.0</td>\n",
486
+ " <td>0.782609</td>\n",
487
+ " <td>0.915254</td>\n",
488
+ " <td>0.101695</td>\n",
489
+ " </tr>\n",
490
+ " <tr>\n",
491
+ " <th>45535</th>\n",
492
+ " <td>1.000000</td>\n",
493
+ " <td>0.6463</td>\n",
494
+ " <td>0.693916</td>\n",
495
+ " <td>0.08</td>\n",
496
+ " <td>1.0</td>\n",
497
+ " <td>1.0</td>\n",
498
+ " <td>0.428571</td>\n",
499
+ " <td>0.379310</td>\n",
500
+ " <td>0.458599</td>\n",
501
+ " <td>0.049208</td>\n",
502
+ " <td>...</td>\n",
503
+ " <td>0.0</td>\n",
504
+ " <td>0.0</td>\n",
505
+ " <td>0.0</td>\n",
506
+ " <td>0.0</td>\n",
507
+ " <td>0.0</td>\n",
508
+ " <td>0.0</td>\n",
509
+ " <td>0.0</td>\n",
510
+ " <td>0.913043</td>\n",
511
+ " <td>0.406780</td>\n",
512
+ " <td>0.508475</td>\n",
513
+ " </tr>\n",
514
+ " <tr>\n",
515
+ " <th>...</th>\n",
516
+ " <td>...</td>\n",
517
+ " <td>...</td>\n",
518
+ " <td>...</td>\n",
519
+ " <td>...</td>\n",
520
+ " <td>...</td>\n",
521
+ " <td>...</td>\n",
522
+ " <td>...</td>\n",
523
+ " <td>...</td>\n",
524
+ " <td>...</td>\n",
525
+ " <td>...</td>\n",
526
+ " <td>...</td>\n",
527
+ " <td>...</td>\n",
528
+ " <td>...</td>\n",
529
+ " <td>...</td>\n",
530
+ " <td>...</td>\n",
531
+ " <td>...</td>\n",
532
+ " <td>...</td>\n",
533
+ " <td>...</td>\n",
534
+ " <td>...</td>\n",
535
+ " <td>...</td>\n",
536
+ " <td>...</td>\n",
537
+ " </tr>\n",
538
+ " <tr>\n",
539
+ " <th>41993</th>\n",
540
+ " <td>1.000000</td>\n",
541
+ " <td>0.8063</td>\n",
542
+ " <td>0.831573</td>\n",
543
+ " <td>0.06</td>\n",
544
+ " <td>1.0</td>\n",
545
+ " <td>0.0</td>\n",
546
+ " <td>0.714286</td>\n",
547
+ " <td>0.155172</td>\n",
548
+ " <td>0.312102</td>\n",
549
+ " <td>0.164571</td>\n",
550
+ " <td>...</td>\n",
551
+ " <td>0.0</td>\n",
552
+ " <td>0.0</td>\n",
553
+ " <td>0.0</td>\n",
554
+ " <td>0.0</td>\n",
555
+ " <td>0.0</td>\n",
556
+ " <td>0.0</td>\n",
557
+ " <td>0.0</td>\n",
558
+ " <td>0.826087</td>\n",
559
+ " <td>0.067797</td>\n",
560
+ " <td>0.762712</td>\n",
561
+ " </tr>\n",
562
+ " <tr>\n",
563
+ " <th>97639</th>\n",
564
+ " <td>1.000000</td>\n",
565
+ " <td>0.5046</td>\n",
566
+ " <td>0.618473</td>\n",
567
+ " <td>0.04</td>\n",
568
+ " <td>0.0</td>\n",
569
+ " <td>1.0</td>\n",
570
+ " <td>0.428571</td>\n",
571
+ " <td>0.155172</td>\n",
572
+ " <td>0.458599</td>\n",
573
+ " <td>0.288001</td>\n",
574
+ " <td>...</td>\n",
575
+ " <td>0.0</td>\n",
576
+ " <td>0.0</td>\n",
577
+ " <td>0.0</td>\n",
578
+ " <td>0.0</td>\n",
579
+ " <td>0.0</td>\n",
580
+ " <td>0.0</td>\n",
581
+ " <td>0.0</td>\n",
582
+ " <td>0.826087</td>\n",
583
+ " <td>0.169492</td>\n",
584
+ " <td>0.186441</td>\n",
585
+ " </tr>\n",
586
+ " <tr>\n",
587
+ " <th>95939</th>\n",
588
+ " <td>1.000000</td>\n",
589
+ " <td>0.7233</td>\n",
590
+ " <td>0.686591</td>\n",
591
+ " <td>0.02</td>\n",
592
+ " <td>0.0</td>\n",
593
+ " <td>0.0</td>\n",
594
+ " <td>0.714286</td>\n",
595
+ " <td>0.034483</td>\n",
596
+ " <td>0.866242</td>\n",
597
+ " <td>0.585850</td>\n",
598
+ " <td>...</td>\n",
599
+ " <td>0.0</td>\n",
600
+ " <td>0.0</td>\n",
601
+ " <td>0.0</td>\n",
602
+ " <td>0.0</td>\n",
603
+ " <td>0.0</td>\n",
604
+ " <td>0.0</td>\n",
605
+ " <td>1.0</td>\n",
606
+ " <td>0.869565</td>\n",
607
+ " <td>0.372881</td>\n",
608
+ " <td>0.847458</td>\n",
609
+ " </tr>\n",
610
+ " <tr>\n",
611
+ " <th>117952</th>\n",
612
+ " <td>1.000000</td>\n",
613
+ " <td>0.7824</td>\n",
614
+ " <td>0.710351</td>\n",
615
+ " <td>0.96</td>\n",
616
+ " <td>1.0</td>\n",
617
+ " <td>1.0</td>\n",
618
+ " <td>0.714286</td>\n",
619
+ " <td>0.086207</td>\n",
620
+ " <td>0.458599</td>\n",
621
+ " <td>0.007728</td>\n",
622
+ " <td>...</td>\n",
623
+ " <td>0.0</td>\n",
624
+ " <td>0.0</td>\n",
625
+ " <td>0.0</td>\n",
626
+ " <td>0.0</td>\n",
627
+ " <td>0.0</td>\n",
628
+ " <td>0.0</td>\n",
629
+ " <td>0.0</td>\n",
630
+ " <td>0.000000</td>\n",
631
+ " <td>0.406780</td>\n",
632
+ " <td>0.779661</td>\n",
633
+ " </tr>\n",
634
+ " <tr>\n",
635
+ " <th>43567</th>\n",
636
+ " <td>1.000000</td>\n",
637
+ " <td>0.7225</td>\n",
638
+ " <td>0.468508</td>\n",
639
+ " <td>1.00</td>\n",
640
+ " <td>0.0</td>\n",
641
+ " <td>1.0</td>\n",
642
+ " <td>0.714286</td>\n",
643
+ " <td>0.051724</td>\n",
644
+ " <td>0.458599</td>\n",
645
+ " <td>0.617746</td>\n",
646
+ " <td>...</td>\n",
647
+ " <td>0.0</td>\n",
648
+ " <td>0.0</td>\n",
649
+ " <td>0.0</td>\n",
650
+ " <td>0.0</td>\n",
651
+ " <td>0.0</td>\n",
652
+ " <td>0.0</td>\n",
653
+ " <td>1.0</td>\n",
654
+ " <td>0.913043</td>\n",
655
+ " <td>0.288136</td>\n",
656
+ " <td>0.305085</td>\n",
657
+ " </tr>\n",
658
+ " </tbody>\n",
659
+ "</table>\n",
660
+ "<p>135000 rows Γ— 45 columns</p>\n",
661
+ "</div>"
662
+ ],
663
+ "text/plain": [
664
+ " a b c d e f g h \\\n",
665
+ "135569 1.000000 0.5217 0.635969 0.02 1.0 1.0 0.714286 0.620690 \n",
666
+ "78656 0.333333 0.7554 0.684908 0.02 0.0 1.0 0.428571 0.137931 \n",
667
+ "87437 1.000000 0.5437 0.741337 0.02 1.0 1.0 0.428571 0.793103 \n",
668
+ "131674 1.000000 0.7418 0.633959 1.00 1.0 1.0 0.714286 0.155172 \n",
669
+ "45535 1.000000 0.6463 0.693916 0.08 1.0 1.0 0.428571 0.379310 \n",
670
+ "... ... ... ... ... ... ... ... ... \n",
671
+ "41993 1.000000 0.8063 0.831573 0.06 1.0 0.0 0.714286 0.155172 \n",
672
+ "97639 1.000000 0.5046 0.618473 0.04 0.0 1.0 0.428571 0.155172 \n",
673
+ "95939 1.000000 0.7233 0.686591 0.02 0.0 0.0 0.714286 0.034483 \n",
674
+ "117952 1.000000 0.7824 0.710351 0.96 1.0 1.0 0.714286 0.086207 \n",
675
+ "43567 1.000000 0.7225 0.468508 1.00 0.0 1.0 0.714286 0.051724 \n",
676
+ "\n",
677
+ " j k ... fecha_month_end fecha_quarter_start \\\n",
678
+ "135569 0.458599 0.636612 ... 0.0 0.0 \n",
679
+ "78656 0.133758 0.633268 ... 0.0 0.0 \n",
680
+ "87437 0.458599 0.735751 ... 0.0 1.0 \n",
681
+ "131674 0.458599 0.529368 ... 0.0 0.0 \n",
682
+ "45535 0.458599 0.049208 ... 0.0 0.0 \n",
683
+ "... ... ... ... ... ... \n",
684
+ "41993 0.312102 0.164571 ... 0.0 0.0 \n",
685
+ "97639 0.458599 0.288001 ... 0.0 0.0 \n",
686
+ "95939 0.866242 0.585850 ... 0.0 0.0 \n",
687
+ "117952 0.458599 0.007728 ... 0.0 0.0 \n",
688
+ "43567 0.458599 0.617746 ... 0.0 0.0 \n",
689
+ "\n",
690
+ " fecha_quarter_end fecha_year_start fecha_year_end fecha_leap_year \\\n",
691
+ "135569 0.0 0.0 0.0 0.0 \n",
692
+ "78656 0.0 0.0 0.0 0.0 \n",
693
+ "87437 0.0 0.0 0.0 0.0 \n",
694
+ "131674 0.0 0.0 0.0 0.0 \n",
695
+ "45535 0.0 0.0 0.0 0.0 \n",
696
+ "... ... ... ... ... \n",
697
+ "41993 0.0 0.0 0.0 0.0 \n",
698
+ "97639 0.0 0.0 0.0 0.0 \n",
699
+ "95939 0.0 0.0 0.0 0.0 \n",
700
+ "117952 0.0 0.0 0.0 0.0 \n",
701
+ "43567 0.0 0.0 0.0 0.0 \n",
702
+ "\n",
703
+ " fecha_days_in_month fecha_hour fecha_minute fecha_second \n",
704
+ "135569 1.0 0.391304 0.525424 0.881356 \n",
705
+ "78656 1.0 0.347826 0.254237 0.288136 \n",
706
+ "87437 0.0 0.391304 0.050847 0.338983 \n",
707
+ "131674 0.0 0.782609 0.915254 0.101695 \n",
708
+ "45535 0.0 0.913043 0.406780 0.508475 \n",
709
+ "... ... ... ... ... \n",
710
+ "41993 0.0 0.826087 0.067797 0.762712 \n",
711
+ "97639 0.0 0.826087 0.169492 0.186441 \n",
712
+ "95939 1.0 0.869565 0.372881 0.847458 \n",
713
+ "117952 0.0 0.000000 0.406780 0.779661 \n",
714
+ "43567 1.0 0.913043 0.288136 0.305085 \n",
715
+ "\n",
716
+ "[135000 rows x 45 columns]"
717
+ ]
718
+ },
719
+ "execution_count": 7,
720
+ "metadata": {},
721
+ "output_type": "execute_result"
722
+ }
723
+ ],
724
+ "source": [
725
+ "X_train_transformed"
726
+ ]
727
+ },
728
+ {
729
+ "cell_type": "code",
730
+ "execution_count": 8,
731
+ "metadata": {},
732
+ "outputs": [],
733
+ "source": [
734
+ "sel = ProbeFeatureSelection(\n",
735
+ " estimator=RandomForestClassifier(),\n",
736
+ " scoring=\"roc_auc\",\n",
737
+ " n_probes=3,\n",
738
+ " distribution=\"all\",\n",
739
+ " cv=3,\n",
740
+ " random_state=150\n",
741
+ ")"
742
+ ]
743
+ },
744
+ {
745
+ "cell_type": "code",
746
+ "execution_count": 9,
747
+ "metadata": {},
748
+ "outputs": [],
749
+ "source": [
750
+ "X_tr = sel.fit_transform(X_train_transformed, y_train)"
751
+ ]
752
+ },
753
+ {
754
+ "cell_type": "code",
755
+ "execution_count": 10,
756
+ "metadata": {},
757
+ "outputs": [
758
+ {
759
+ "name": "stdout",
760
+ "output_type": "stream",
761
+ "text": [
762
+ "(135000, 45) (135000, 13)\n"
763
+ ]
764
+ }
765
+ ],
766
+ "source": [
767
+ "print(X_train_transformed.shape, X_tr.shape)\n"
768
+ ]
769
+ },
770
+ {
771
+ "cell_type": "code",
772
+ "execution_count": null,
773
+ "metadata": {},
774
+ "outputs": [],
775
+ "source": []
776
+ },
777
+ {
778
+ "cell_type": "code",
779
+ "execution_count": 11,
780
+ "metadata": {},
781
+ "outputs": [],
782
+ "source": [
783
+ "selected_features = X_tr.columns"
784
+ ]
785
+ },
786
+ {
787
+ "cell_type": "code",
788
+ "execution_count": 12,
789
+ "metadata": {},
790
+ "outputs": [],
791
+ "source": [
792
+ "pd.Series(selected_features).to_csv('../data/processed/selected_features.csv', index=False)"
793
+ ]
794
+ },
795
+ {
796
+ "cell_type": "code",
797
+ "execution_count": null,
798
+ "metadata": {},
799
+ "outputs": [],
800
+ "source": []
801
+ },
802
+ {
803
+ "cell_type": "code",
804
+ "execution_count": null,
805
+ "metadata": {},
806
+ "outputs": [],
807
+ "source": []
808
+ }
809
+ ],
810
+ "metadata": {
811
+ "kernelspec": {
812
+ "display_name": "fraud-detection",
813
+ "language": "python",
814
+ "name": "python3"
815
+ },
816
+ "language_info": {
817
+ "codemirror_mode": {
818
+ "name": "ipython",
819
+ "version": 3
820
+ },
821
+ "file_extension": ".py",
822
+ "mimetype": "text/x-python",
823
+ "name": "python",
824
+ "nbconvert_exporter": "python",
825
+ "pygments_lexer": "ipython3",
826
+ "version": "3.10.12"
827
+ },
828
+ "orig_nbformat": 4,
829
+ "vscode": {
830
+ "interpreter": {
831
+ "hash": "45e631c81adbf0cb55b2526738ae1a14c53cfa3f28a6ae1bee5619daf3ab935d"
832
+ }
833
+ }
834
+ },
835
+ "nbformat": 4,
836
+ "nbformat_minor": 2
837
+ }
notebooks/04-model _training.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
notebooks/__pycache__/utils.cpython-310.pyc ADDED
Binary file (1.19 kB). View file
 
notebooks/logs.log ADDED
@@ -0,0 +1,808 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2023-10-08 15:23:18,818:WARNING:
2
+ 'cuml' is a soft dependency and not included in the pycaret installation. Please run: `pip install cuml` to install.
3
+ 2023-10-08 15:23:18,819:WARNING:
4
+ 'cuml' is a soft dependency and not included in the pycaret installation. Please run: `pip install cuml` to install.
5
+ 2023-10-08 15:23:18,819:WARNING:
6
+ 'cuml' is a soft dependency and not included in the pycaret installation. Please run: `pip install cuml` to install.
7
+ 2023-10-08 15:23:18,819:WARNING:
8
+ 'cuml' is a soft dependency and not included in the pycaret installation. Please run: `pip install cuml` to install.
9
+ 2023-10-08 15:38:13,449:WARNING:
10
+ 'cuml' is a soft dependency and not included in the pycaret installation. Please run: `pip install cuml` to install.
11
+ 2023-10-08 15:38:13,449:WARNING:
12
+ 'cuml' is a soft dependency and not included in the pycaret installation. Please run: `pip install cuml` to install.
13
+ 2023-10-08 15:38:13,449:WARNING:
14
+ 'cuml' is a soft dependency and not included in the pycaret installation. Please run: `pip install cuml` to install.
15
+ 2023-10-08 15:38:13,449:WARNING:
16
+ 'cuml' is a soft dependency and not included in the pycaret installation. Please run: `pip install cuml` to install.
17
+ 2023-10-08 15:40:39,103:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs failed to converge (status=1):
18
+ STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
19
+
20
+ Increase the number of iterations (max_iter) or scale the data as shown in:
21
+ https://scikit-learn.org/stable/modules/preprocessing.html
22
+ Please also refer to the documentation for alternative solver options:
23
+ https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
24
+ n_iter_i = _check_optimize_result(
25
+
26
+ 2023-10-08 15:40:41,463:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs failed to converge (status=1):
27
+ STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
28
+
29
+ Increase the number of iterations (max_iter) or scale the data as shown in:
30
+ https://scikit-learn.org/stable/modules/preprocessing.html
31
+ Please also refer to the documentation for alternative solver options:
32
+ https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
33
+ n_iter_i = _check_optimize_result(
34
+
35
+ 2023-10-08 15:40:43,785:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs failed to converge (status=1):
36
+ STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
37
+
38
+ Increase the number of iterations (max_iter) or scale the data as shown in:
39
+ https://scikit-learn.org/stable/modules/preprocessing.html
40
+ Please also refer to the documentation for alternative solver options:
41
+ https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
42
+ n_iter_i = _check_optimize_result(
43
+
44
+ 2023-10-08 15:40:46,764:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs failed to converge (status=1):
45
+ STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
46
+
47
+ Increase the number of iterations (max_iter) or scale the data as shown in:
48
+ https://scikit-learn.org/stable/modules/preprocessing.html
49
+ Please also refer to the documentation for alternative solver options:
50
+ https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
51
+ n_iter_i = _check_optimize_result(
52
+
53
+ 2023-10-08 15:40:48,451:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs failed to converge (status=1):
54
+ STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
55
+
56
+ Increase the number of iterations (max_iter) or scale the data as shown in:
57
+ https://scikit-learn.org/stable/modules/preprocessing.html
58
+ Please also refer to the documentation for alternative solver options:
59
+ https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
60
+ n_iter_i = _check_optimize_result(
61
+
62
+ 2023-10-08 15:40:51,170:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs failed to converge (status=1):
63
+ STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
64
+
65
+ Increase the number of iterations (max_iter) or scale the data as shown in:
66
+ https://scikit-learn.org/stable/modules/preprocessing.html
67
+ Please also refer to the documentation for alternative solver options:
68
+ https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
69
+ n_iter_i = _check_optimize_result(
70
+
71
+ 2023-10-08 15:40:53,845:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs failed to converge (status=1):
72
+ STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
73
+
74
+ Increase the number of iterations (max_iter) or scale the data as shown in:
75
+ https://scikit-learn.org/stable/modules/preprocessing.html
76
+ Please also refer to the documentation for alternative solver options:
77
+ https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
78
+ n_iter_i = _check_optimize_result(
79
+
80
+ 2023-10-08 15:40:56,184:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs failed to converge (status=1):
81
+ STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
82
+
83
+ Increase the number of iterations (max_iter) or scale the data as shown in:
84
+ https://scikit-learn.org/stable/modules/preprocessing.html
85
+ Please also refer to the documentation for alternative solver options:
86
+ https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
87
+ n_iter_i = _check_optimize_result(
88
+
89
+ 2023-10-08 15:40:59,289:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs failed to converge (status=1):
90
+ STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
91
+
92
+ Increase the number of iterations (max_iter) or scale the data as shown in:
93
+ https://scikit-learn.org/stable/modules/preprocessing.html
94
+ Please also refer to the documentation for alternative solver options:
95
+ https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
96
+ n_iter_i = _check_optimize_result(
97
+
98
+ 2023-10-08 15:41:02,358:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs failed to converge (status=1):
99
+ STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
100
+
101
+ Increase the number of iterations (max_iter) or scale the data as shown in:
102
+ https://scikit-learn.org/stable/modules/preprocessing.html
103
+ Please also refer to the documentation for alternative solver options:
104
+ https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
105
+ n_iter_i = _check_optimize_result(
106
+
107
+ 2023-10-08 15:41:04,033:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs failed to converge (status=1):
108
+ STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
109
+
110
+ Increase the number of iterations (max_iter) or scale the data as shown in:
111
+ https://scikit-learn.org/stable/modules/preprocessing.html
112
+ Please also refer to the documentation for alternative solver options:
113
+ https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
114
+ n_iter_i = _check_optimize_result(
115
+
116
+ 2023-10-08 15:49:53,461:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:335: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
117
+ if is_sparse(dtype):
118
+
119
+ 2023-10-08 15:49:53,461:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:338: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
120
+ is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
121
+
122
+ 2023-10-08 15:49:53,462:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:384: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
123
+ if is_categorical_dtype(dtype):
124
+
125
+ 2023-10-08 15:49:53,463:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:359: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
126
+ return is_int or is_bool or is_float or is_categorical_dtype(dtype)
127
+
128
+ 2023-10-08 15:51:35,340:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:335: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
129
+ if is_sparse(dtype):
130
+
131
+ 2023-10-08 15:51:35,340:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:338: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
132
+ is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
133
+
134
+ 2023-10-08 15:51:35,341:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:384: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
135
+ if is_categorical_dtype(dtype):
136
+
137
+ 2023-10-08 15:51:35,342:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:359: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
138
+ return is_int or is_bool or is_float or is_categorical_dtype(dtype)
139
+
140
+ 2023-10-08 15:51:35,411:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:335: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
141
+ if is_sparse(dtype):
142
+
143
+ 2023-10-08 15:51:35,411:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:338: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
144
+ is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
145
+
146
+ 2023-10-08 15:51:35,413:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:384: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
147
+ if is_categorical_dtype(dtype):
148
+
149
+ 2023-10-08 15:51:35,413:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:359: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
150
+ return is_int or is_bool or is_float or is_categorical_dtype(dtype)
151
+
152
+ 2023-10-08 15:51:35,498:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:335: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
153
+ if is_sparse(dtype):
154
+
155
+ 2023-10-08 15:51:35,498:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:338: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
156
+ is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
157
+
158
+ 2023-10-08 15:51:35,501:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:384: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
159
+ if is_categorical_dtype(dtype):
160
+
161
+ 2023-10-08 15:51:35,501:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:359: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
162
+ return is_int or is_bool or is_float or is_categorical_dtype(dtype)
163
+
164
+ 2023-10-08 15:53:16,848:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:335: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
165
+ if is_sparse(dtype):
166
+
167
+ 2023-10-08 15:53:16,848:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:338: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
168
+ is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
169
+
170
+ 2023-10-08 15:53:16,849:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:384: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
171
+ if is_categorical_dtype(dtype):
172
+
173
+ 2023-10-08 15:53:16,849:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:359: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
174
+ return is_int or is_bool or is_float or is_categorical_dtype(dtype)
175
+
176
+ 2023-10-08 15:53:16,922:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:335: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
177
+ if is_sparse(dtype):
178
+
179
+ 2023-10-08 15:53:16,922:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:338: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
180
+ is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
181
+
182
+ 2023-10-08 15:53:16,923:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:384: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
183
+ if is_categorical_dtype(dtype):
184
+
185
+ 2023-10-08 15:53:16,923:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:359: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
186
+ return is_int or is_bool or is_float or is_categorical_dtype(dtype)
187
+
188
+ 2023-10-08 15:53:17,013:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:335: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
189
+ if is_sparse(dtype):
190
+
191
+ 2023-10-08 15:53:17,013:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:338: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
192
+ is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
193
+
194
+ 2023-10-08 15:53:17,014:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:384: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
195
+ if is_categorical_dtype(dtype):
196
+
197
+ 2023-10-08 15:53:17,014:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:359: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
198
+ return is_int or is_bool or is_float or is_categorical_dtype(dtype)
199
+
200
+ 2023-10-08 15:54:59,320:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:335: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
201
+ if is_sparse(dtype):
202
+
203
+ 2023-10-08 15:54:59,320:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:338: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
204
+ is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
205
+
206
+ 2023-10-08 15:54:59,321:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:384: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
207
+ if is_categorical_dtype(dtype):
208
+
209
+ 2023-10-08 15:54:59,321:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:359: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
210
+ return is_int or is_bool or is_float or is_categorical_dtype(dtype)
211
+
212
+ 2023-10-08 15:54:59,383:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:335: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
213
+ if is_sparse(dtype):
214
+
215
+ 2023-10-08 15:54:59,383:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:338: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
216
+ is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
217
+
218
+ 2023-10-08 15:54:59,384:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:384: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
219
+ if is_categorical_dtype(dtype):
220
+
221
+ 2023-10-08 15:54:59,385:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:359: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
222
+ return is_int or is_bool or is_float or is_categorical_dtype(dtype)
223
+
224
+ 2023-10-08 15:54:59,453:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:335: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
225
+ if is_sparse(dtype):
226
+
227
+ 2023-10-08 15:54:59,453:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:338: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
228
+ is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
229
+
230
+ 2023-10-08 15:54:59,454:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:384: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
231
+ if is_categorical_dtype(dtype):
232
+
233
+ 2023-10-08 15:54:59,454:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:359: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
234
+ return is_int or is_bool or is_float or is_categorical_dtype(dtype)
235
+
236
+ 2023-10-08 15:56:41,633:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:335: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
237
+ if is_sparse(dtype):
238
+
239
+ 2023-10-08 15:56:41,633:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:338: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
240
+ is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
241
+
242
+ 2023-10-08 15:56:41,635:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:384: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
243
+ if is_categorical_dtype(dtype):
244
+
245
+ 2023-10-08 15:56:41,635:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:359: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
246
+ return is_int or is_bool or is_float or is_categorical_dtype(dtype)
247
+
248
+ 2023-10-08 15:56:41,710:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:335: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
249
+ if is_sparse(dtype):
250
+
251
+ 2023-10-08 15:56:41,710:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:338: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
252
+ is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
253
+
254
+ 2023-10-08 15:56:41,712:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:384: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
255
+ if is_categorical_dtype(dtype):
256
+
257
+ 2023-10-08 15:56:41,712:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:359: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
258
+ return is_int or is_bool or is_float or is_categorical_dtype(dtype)
259
+
260
+ 2023-10-08 15:56:41,800:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:335: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
261
+ if is_sparse(dtype):
262
+
263
+ 2023-10-08 15:56:41,800:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:338: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
264
+ is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
265
+
266
+ 2023-10-08 15:56:41,802:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:384: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
267
+ if is_categorical_dtype(dtype):
268
+
269
+ 2023-10-08 15:56:41,802:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:359: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
270
+ return is_int or is_bool or is_float or is_categorical_dtype(dtype)
271
+
272
+ 2023-10-08 15:58:23,396:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:335: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
273
+ if is_sparse(dtype):
274
+
275
+ 2023-10-08 15:58:23,396:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:338: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
276
+ is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
277
+
278
+ 2023-10-08 15:58:23,398:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:384: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
279
+ if is_categorical_dtype(dtype):
280
+
281
+ 2023-10-08 15:58:23,398:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:359: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
282
+ return is_int or is_bool or is_float or is_categorical_dtype(dtype)
283
+
284
+ 2023-10-08 15:58:23,460:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:335: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
285
+ if is_sparse(dtype):
286
+
287
+ 2023-10-08 15:58:23,460:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:338: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
288
+ is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
289
+
290
+ 2023-10-08 15:58:23,461:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:384: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
291
+ if is_categorical_dtype(dtype):
292
+
293
+ 2023-10-08 15:58:23,462:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:359: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
294
+ return is_int or is_bool or is_float or is_categorical_dtype(dtype)
295
+
296
+ 2023-10-08 15:58:23,549:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:335: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
297
+ if is_sparse(dtype):
298
+
299
+ 2023-10-08 15:58:23,549:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:338: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
300
+ is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
301
+
302
+ 2023-10-08 15:58:23,551:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:384: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
303
+ if is_categorical_dtype(dtype):
304
+
305
+ 2023-10-08 15:58:23,551:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:359: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
306
+ return is_int or is_bool or is_float or is_categorical_dtype(dtype)
307
+
308
+ 2023-10-08 16:00:04,973:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:335: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
309
+ if is_sparse(dtype):
310
+
311
+ 2023-10-08 16:00:04,973:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:338: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
312
+ is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
313
+
314
+ 2023-10-08 16:00:04,975:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:384: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
315
+ if is_categorical_dtype(dtype):
316
+
317
+ 2023-10-08 16:00:04,975:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:359: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
318
+ return is_int or is_bool or is_float or is_categorical_dtype(dtype)
319
+
320
+ 2023-10-08 16:00:05,073:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:335: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
321
+ if is_sparse(dtype):
322
+
323
+ 2023-10-08 16:00:05,073:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:338: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
324
+ is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
325
+
326
+ 2023-10-08 16:00:05,075:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:384: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
327
+ if is_categorical_dtype(dtype):
328
+
329
+ 2023-10-08 16:00:05,075:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:359: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
330
+ return is_int or is_bool or is_float or is_categorical_dtype(dtype)
331
+
332
+ 2023-10-08 16:00:05,163:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:335: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
333
+ if is_sparse(dtype):
334
+
335
+ 2023-10-08 16:00:05,163:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:338: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
336
+ is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
337
+
338
+ 2023-10-08 16:00:05,165:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:384: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
339
+ if is_categorical_dtype(dtype):
340
+
341
+ 2023-10-08 16:00:05,165:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:359: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
342
+ return is_int or is_bool or is_float or is_categorical_dtype(dtype)
343
+
344
+ 2023-10-08 16:01:42,577:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:335: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
345
+ if is_sparse(dtype):
346
+
347
+ 2023-10-08 16:01:42,577:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:338: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
348
+ is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
349
+
350
+ 2023-10-08 16:01:42,579:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:384: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
351
+ if is_categorical_dtype(dtype):
352
+
353
+ 2023-10-08 16:01:42,579:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:359: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
354
+ return is_int or is_bool or is_float or is_categorical_dtype(dtype)
355
+
356
+ 2023-10-08 16:01:42,643:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:335: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
357
+ if is_sparse(dtype):
358
+
359
+ 2023-10-08 16:01:42,643:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:338: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
360
+ is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
361
+
362
+ 2023-10-08 16:01:42,645:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:384: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
363
+ if is_categorical_dtype(dtype):
364
+
365
+ 2023-10-08 16:01:42,645:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:359: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
366
+ return is_int or is_bool or is_float or is_categorical_dtype(dtype)
367
+
368
+ 2023-10-08 16:01:42,725:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:335: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
369
+ if is_sparse(dtype):
370
+
371
+ 2023-10-08 16:01:42,725:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:338: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
372
+ is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
373
+
374
+ 2023-10-08 16:01:42,726:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:384: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
375
+ if is_categorical_dtype(dtype):
376
+
377
+ 2023-10-08 16:01:42,726:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:359: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
378
+ return is_int or is_bool or is_float or is_categorical_dtype(dtype)
379
+
380
+ 2023-10-08 16:03:23,976:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:335: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
381
+ if is_sparse(dtype):
382
+
383
+ 2023-10-08 16:03:23,977:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:338: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
384
+ is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
385
+
386
+ 2023-10-08 16:03:23,978:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:384: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
387
+ if is_categorical_dtype(dtype):
388
+
389
+ 2023-10-08 16:03:23,979:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:359: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
390
+ return is_int or is_bool or is_float or is_categorical_dtype(dtype)
391
+
392
+ 2023-10-08 16:03:24,050:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:335: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
393
+ if is_sparse(dtype):
394
+
395
+ 2023-10-08 16:03:24,050:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:338: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
396
+ is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
397
+
398
+ 2023-10-08 16:03:24,051:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:384: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
399
+ if is_categorical_dtype(dtype):
400
+
401
+ 2023-10-08 16:03:24,051:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:359: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
402
+ return is_int or is_bool or is_float or is_categorical_dtype(dtype)
403
+
404
+ 2023-10-08 16:03:24,153:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:335: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
405
+ if is_sparse(dtype):
406
+
407
+ 2023-10-08 16:03:24,153:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:338: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
408
+ is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
409
+
410
+ 2023-10-08 16:03:24,154:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:384: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
411
+ if is_categorical_dtype(dtype):
412
+
413
+ 2023-10-08 16:03:24,154:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:359: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
414
+ return is_int or is_bool or is_float or is_categorical_dtype(dtype)
415
+
416
+ 2023-10-08 16:05:05,773:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:335: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
417
+ if is_sparse(dtype):
418
+
419
+ 2023-10-08 16:05:05,773:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:338: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
420
+ is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
421
+
422
+ 2023-10-08 16:05:05,775:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:384: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
423
+ if is_categorical_dtype(dtype):
424
+
425
+ 2023-10-08 16:05:05,775:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:359: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
426
+ return is_int or is_bool or is_float or is_categorical_dtype(dtype)
427
+
428
+ 2023-10-08 16:05:05,839:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:335: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
429
+ if is_sparse(dtype):
430
+
431
+ 2023-10-08 16:05:05,839:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:338: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
432
+ is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
433
+
434
+ 2023-10-08 16:05:05,841:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:384: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
435
+ if is_categorical_dtype(dtype):
436
+
437
+ 2023-10-08 16:05:05,841:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:359: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
438
+ return is_int or is_bool or is_float or is_categorical_dtype(dtype)
439
+
440
+ 2023-10-08 16:05:05,917:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:335: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
441
+ if is_sparse(dtype):
442
+
443
+ 2023-10-08 16:05:05,918:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:338: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
444
+ is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
445
+
446
+ 2023-10-08 16:05:05,919:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:384: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
447
+ if is_categorical_dtype(dtype):
448
+
449
+ 2023-10-08 16:05:05,919:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:359: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
450
+ return is_int or is_bool or is_float or is_categorical_dtype(dtype)
451
+
452
+ 2023-10-08 16:06:47,939:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:335: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
453
+ if is_sparse(dtype):
454
+
455
+ 2023-10-08 16:06:47,940:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:338: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
456
+ is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
457
+
458
+ 2023-10-08 16:06:47,941:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:384: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
459
+ if is_categorical_dtype(dtype):
460
+
461
+ 2023-10-08 16:06:47,941:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:359: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
462
+ return is_int or is_bool or is_float or is_categorical_dtype(dtype)
463
+
464
+ 2023-10-08 16:06:47,995:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:335: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
465
+ if is_sparse(dtype):
466
+
467
+ 2023-10-08 16:06:47,995:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:338: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
468
+ is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
469
+
470
+ 2023-10-08 16:06:47,996:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:384: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
471
+ if is_categorical_dtype(dtype):
472
+
473
+ 2023-10-08 16:06:47,996:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:359: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
474
+ return is_int or is_bool or is_float or is_categorical_dtype(dtype)
475
+
476
+ 2023-10-08 16:06:48,040:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:335: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
477
+ if is_sparse(dtype):
478
+
479
+ 2023-10-08 16:06:48,040:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:338: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
480
+ is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
481
+
482
+ 2023-10-08 16:06:48,041:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:384: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
483
+ if is_categorical_dtype(dtype):
484
+
485
+ 2023-10-08 16:06:48,042:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:359: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
486
+ return is_int or is_bool or is_float or is_categorical_dtype(dtype)
487
+
488
+ 2023-10-08 16:08:29,796:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:335: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
489
+ if is_sparse(dtype):
490
+
491
+ 2023-10-08 16:08:29,796:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:338: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
492
+ is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
493
+
494
+ 2023-10-08 16:08:29,798:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:384: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
495
+ if is_categorical_dtype(dtype):
496
+
497
+ 2023-10-08 16:08:29,798:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:359: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
498
+ return is_int or is_bool or is_float or is_categorical_dtype(dtype)
499
+
500
+ 2023-10-08 17:48:44,232:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs failed to converge (status=1):
501
+ STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
502
+
503
+ Increase the number of iterations (max_iter) or scale the data as shown in:
504
+ https://scikit-learn.org/stable/modules/preprocessing.html
505
+ Please also refer to the documentation for alternative solver options:
506
+ https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
507
+ n_iter_i = _check_optimize_result(
508
+
509
+ 2023-10-08 17:48:46,174:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs failed to converge (status=1):
510
+ STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
511
+
512
+ Increase the number of iterations (max_iter) or scale the data as shown in:
513
+ https://scikit-learn.org/stable/modules/preprocessing.html
514
+ Please also refer to the documentation for alternative solver options:
515
+ https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
516
+ n_iter_i = _check_optimize_result(
517
+
518
+ 2023-10-08 17:48:48,326:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs failed to converge (status=1):
519
+ STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
520
+
521
+ Increase the number of iterations (max_iter) or scale the data as shown in:
522
+ https://scikit-learn.org/stable/modules/preprocessing.html
523
+ Please also refer to the documentation for alternative solver options:
524
+ https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
525
+ n_iter_i = _check_optimize_result(
526
+
527
+ 2023-10-08 17:48:49,750:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs failed to converge (status=1):
528
+ STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
529
+
530
+ Increase the number of iterations (max_iter) or scale the data as shown in:
531
+ https://scikit-learn.org/stable/modules/preprocessing.html
532
+ Please also refer to the documentation for alternative solver options:
533
+ https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
534
+ n_iter_i = _check_optimize_result(
535
+
536
+ 2023-10-08 17:48:52,626:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs failed to converge (status=1):
537
+ STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
538
+
539
+ Increase the number of iterations (max_iter) or scale the data as shown in:
540
+ https://scikit-learn.org/stable/modules/preprocessing.html
541
+ Please also refer to the documentation for alternative solver options:
542
+ https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
543
+ n_iter_i = _check_optimize_result(
544
+
545
+ 2023-10-08 17:48:55,008:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs failed to converge (status=1):
546
+ STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
547
+
548
+ Increase the number of iterations (max_iter) or scale the data as shown in:
549
+ https://scikit-learn.org/stable/modules/preprocessing.html
550
+ Please also refer to the documentation for alternative solver options:
551
+ https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
552
+ n_iter_i = _check_optimize_result(
553
+
554
+ 2023-10-08 17:54:25,847:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:335: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
555
+ if is_sparse(dtype):
556
+
557
+ 2023-10-08 17:54:25,847:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:338: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
558
+ is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
559
+
560
+ 2023-10-08 17:54:25,848:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:384: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
561
+ if is_categorical_dtype(dtype):
562
+
563
+ 2023-10-08 17:54:25,848:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:359: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
564
+ return is_int or is_bool or is_float or is_categorical_dtype(dtype)
565
+
566
+ 2023-10-08 17:55:54,180:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:335: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
567
+ if is_sparse(dtype):
568
+
569
+ 2023-10-08 17:55:54,181:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:338: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
570
+ is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
571
+
572
+ 2023-10-08 17:55:54,182:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:384: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
573
+ if is_categorical_dtype(dtype):
574
+
575
+ 2023-10-08 17:55:54,183:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:359: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
576
+ return is_int or is_bool or is_float or is_categorical_dtype(dtype)
577
+
578
+ 2023-10-08 17:55:54,249:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:335: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
579
+ if is_sparse(dtype):
580
+
581
+ 2023-10-08 17:55:54,250:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:338: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
582
+ is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
583
+
584
+ 2023-10-08 17:55:54,251:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:384: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
585
+ if is_categorical_dtype(dtype):
586
+
587
+ 2023-10-08 17:55:54,251:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:359: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
588
+ return is_int or is_bool or is_float or is_categorical_dtype(dtype)
589
+
590
+ 2023-10-08 17:55:54,332:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:335: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
591
+ if is_sparse(dtype):
592
+
593
+ 2023-10-08 17:55:54,332:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:338: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
594
+ is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
595
+
596
+ 2023-10-08 17:55:54,334:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:384: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
597
+ if is_categorical_dtype(dtype):
598
+
599
+ 2023-10-08 17:55:54,334:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:359: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
600
+ return is_int or is_bool or is_float or is_categorical_dtype(dtype)
601
+
602
+ 2023-10-08 17:57:34,908:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:335: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
603
+ if is_sparse(dtype):
604
+
605
+ 2023-10-08 17:57:34,908:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:338: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
606
+ is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
607
+
608
+ 2023-10-08 17:57:34,909:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:384: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
609
+ if is_categorical_dtype(dtype):
610
+
611
+ 2023-10-08 17:57:34,909:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:359: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
612
+ return is_int or is_bool or is_float or is_categorical_dtype(dtype)
613
+
614
+ 2023-10-08 17:57:34,982:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:335: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
615
+ if is_sparse(dtype):
616
+
617
+ 2023-10-08 17:57:34,982:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:338: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
618
+ is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
619
+
620
+ 2023-10-08 17:57:34,983:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:384: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
621
+ if is_categorical_dtype(dtype):
622
+
623
+ 2023-10-08 17:57:34,983:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:359: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
624
+ return is_int or is_bool or is_float or is_categorical_dtype(dtype)
625
+
626
+ 2023-10-08 17:57:35,076:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:335: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
627
+ if is_sparse(dtype):
628
+
629
+ 2023-10-08 17:57:35,076:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:338: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
630
+ is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
631
+
632
+ 2023-10-08 17:57:35,078:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:384: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
633
+ if is_categorical_dtype(dtype):
634
+
635
+ 2023-10-08 17:57:35,078:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:359: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
636
+ return is_int or is_bool or is_float or is_categorical_dtype(dtype)
637
+
638
+ 2023-10-08 17:59:19,597:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:335: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
639
+ if is_sparse(dtype):
640
+
641
+ 2023-10-08 17:59:19,598:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:338: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
642
+ is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
643
+
644
+ 2023-10-08 17:59:19,600:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:384: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
645
+ if is_categorical_dtype(dtype):
646
+
647
+ 2023-10-08 17:59:19,601:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:359: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
648
+ return is_int or is_bool or is_float or is_categorical_dtype(dtype)
649
+
650
+ 2023-10-08 17:59:19,692:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:335: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
651
+ if is_sparse(dtype):
652
+
653
+ 2023-10-08 17:59:19,692:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:338: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
654
+ is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
655
+
656
+ 2023-10-08 17:59:19,695:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:384: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
657
+ if is_categorical_dtype(dtype):
658
+
659
+ 2023-10-08 17:59:19,695:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:359: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
660
+ return is_int or is_bool or is_float or is_categorical_dtype(dtype)
661
+
662
+ 2023-10-08 17:59:19,800:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:335: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
663
+ if is_sparse(dtype):
664
+
665
+ 2023-10-08 17:59:19,800:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:338: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
666
+ is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
667
+
668
+ 2023-10-08 17:59:19,801:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:384: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
669
+ if is_categorical_dtype(dtype):
670
+
671
+ 2023-10-08 17:59:19,801:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:359: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
672
+ return is_int or is_bool or is_float or is_categorical_dtype(dtype)
673
+
674
+ 2023-10-08 18:01:03,749:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:335: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
675
+ if is_sparse(dtype):
676
+
677
+ 2023-10-08 18:01:03,749:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:338: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
678
+ is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
679
+
680
+ 2023-10-08 18:01:03,750:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:384: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
681
+ if is_categorical_dtype(dtype):
682
+
683
+ 2023-10-08 18:01:03,751:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:359: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
684
+ return is_int or is_bool or is_float or is_categorical_dtype(dtype)
685
+
686
+ 2023-10-08 18:01:03,849:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:335: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
687
+ if is_sparse(dtype):
688
+
689
+ 2023-10-08 18:01:03,850:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:338: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
690
+ is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
691
+
692
+ 2023-10-08 18:01:03,851:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:384: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
693
+ if is_categorical_dtype(dtype):
694
+
695
+ 2023-10-08 18:01:03,851:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:359: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
696
+ return is_int or is_bool or is_float or is_categorical_dtype(dtype)
697
+
698
+ 2023-10-08 18:01:03,972:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:335: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
699
+ if is_sparse(dtype):
700
+
701
+ 2023-10-08 18:01:03,972:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:338: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
702
+ is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
703
+
704
+ 2023-10-08 18:01:03,974:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:384: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
705
+ if is_categorical_dtype(dtype):
706
+
707
+ 2023-10-08 18:01:03,974:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:359: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
708
+ return is_int or is_bool or is_float or is_categorical_dtype(dtype)
709
+
710
+ 2023-10-08 18:02:46,388:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:335: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
711
+ if is_sparse(dtype):
712
+
713
+ 2023-10-08 18:02:46,388:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:338: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
714
+ is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
715
+
716
+ 2023-10-08 18:02:46,389:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:384: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
717
+ if is_categorical_dtype(dtype):
718
+
719
+ 2023-10-08 18:02:46,389:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:359: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
720
+ return is_int or is_bool or is_float or is_categorical_dtype(dtype)
721
+
722
+ 2023-10-08 18:02:46,460:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:335: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
723
+ if is_sparse(dtype):
724
+
725
+ 2023-10-08 18:02:46,460:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:338: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
726
+ is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
727
+
728
+ 2023-10-08 18:02:46,461:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:384: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
729
+ if is_categorical_dtype(dtype):
730
+
731
+ 2023-10-08 18:02:46,461:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:359: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
732
+ return is_int or is_bool or is_float or is_categorical_dtype(dtype)
733
+
734
+ 2023-10-08 18:02:46,520:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:335: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
735
+ if is_sparse(dtype):
736
+
737
+ 2023-10-08 18:02:46,520:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:338: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
738
+ is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
739
+
740
+ 2023-10-08 18:02:46,521:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:384: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
741
+ if is_categorical_dtype(dtype):
742
+
743
+ 2023-10-08 18:02:46,521:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:359: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
744
+ return is_int or is_bool or is_float or is_categorical_dtype(dtype)
745
+
746
+ 2023-10-08 18:04:29,787:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:335: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
747
+ if is_sparse(dtype):
748
+
749
+ 2023-10-08 18:04:29,788:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:338: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
750
+ is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
751
+
752
+ 2023-10-08 18:04:29,789:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:384: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
753
+ if is_categorical_dtype(dtype):
754
+
755
+ 2023-10-08 18:04:29,789:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:359: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
756
+ return is_int or is_bool or is_float or is_categorical_dtype(dtype)
757
+
758
+ 2023-10-08 18:23:23,859:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:335: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
759
+ if is_sparse(dtype):
760
+
761
+ 2023-10-08 18:23:23,860:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:338: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
762
+ is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
763
+
764
+ 2023-10-08 18:23:23,861:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:384: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
765
+ if is_categorical_dtype(dtype):
766
+
767
+ 2023-10-08 18:23:23,861:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:359: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
768
+ return is_int or is_bool or is_float or is_categorical_dtype(dtype)
769
+
770
+ 2023-10-08 18:23:23,863:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:520: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
771
+ if is_sparse(data):
772
+
773
+ 2023-10-08 18:25:09,016:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:335: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
774
+ if is_sparse(dtype):
775
+
776
+ 2023-10-08 18:25:09,016:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:338: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
777
+ is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
778
+
779
+ 2023-10-08 18:25:09,019:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:384: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
780
+ if is_categorical_dtype(dtype):
781
+
782
+ 2023-10-08 18:25:09,019:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:359: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
783
+ return is_int or is_bool or is_float or is_categorical_dtype(dtype)
784
+
785
+ 2023-10-08 20:08:20,046:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:335: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
786
+ if is_sparse(dtype):
787
+
788
+ 2023-10-08 20:08:20,055:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:338: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
789
+ is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
790
+
791
+ 2023-10-08 20:08:20,057:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:384: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
792
+ if is_categorical_dtype(dtype):
793
+
794
+ 2023-10-08 20:08:20,058:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:359: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
795
+ return is_int or is_bool or is_float or is_categorical_dtype(dtype)
796
+
797
+ 2023-10-08 20:08:26,350:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:335: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
798
+ if is_sparse(dtype):
799
+
800
+ 2023-10-08 20:08:26,351:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:338: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
801
+ is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
802
+
803
+ 2023-10-08 20:08:26,352:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:384: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
804
+ if is_categorical_dtype(dtype):
805
+
806
+ 2023-10-08 20:08:26,352:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:359: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
807
+ return is_int or is_bool or is_float or is_categorical_dtype(dtype)
808
+
notebooks/utils.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sklearn.base import BaseEstimator, TransformerMixin
2
+ from sklearn.preprocessing import MinMaxScaler, StandardScaler
3
+ import pandas as pd
4
+
5
+ class ScalerDf(BaseEstimator, TransformerMixin):
6
+
7
+ def __init__(self, method):
8
+ self.method = method
9
+
10
+ def transform(self, X):
11
+ X = pd.DataFrame(
12
+ self.scaler.transform(X),
13
+ columns=X.columns,
14
+ index=X.index
15
+ )
16
+ return X
17
+
18
+ def fit(self, X, y=None):
19
+ if self.method == 'minmax':
20
+ self.scaler = MinMaxScaler()
21
+ elif self.method == 'standard':
22
+ self.scaler = StandardScaler()
23
+ elif self.method == 'none':
24
+ return self
25
+ else:
26
+ raise ValueError("Invalid scaling method. Supported methods are 'minmax', 'standard', and 'none'.")
27
+
28
+ self.scaler.fit(X)
29
+ return self
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ ## python 3.10
2
+ pandas==2.1.1
3
+ scikit-learn==1.3.1
4
+ feature_engine==1.6.2
5
+ xgboost==2.0.0
6
+ gradio==3.35.2
src/__pycache__/utils.cpython-310.pyc ADDED
Binary file (1.19 kB). View file
 
src/app.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ import joblib
4
+ import numpy as np
5
+ import json
6
+
7
+ data = pd.read_csv('data/MercadoLibre Data Scientist Technical Challenge - Dataset.csv')
8
+ pipeline = joblib.load('models/final_pipeline.joblib')
9
+ ls = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'j', 'k', 'l', 'm', 'n', 'o','p', 'fecha', 'monto', 'score']
10
+ data = data[ls]
11
+ def sentence_builder(a, b, c, d, e, f, g, h, j, k, l, m, n, o, p, fecha, monto, score):
12
+
13
+ ls = [a, b, c, d, e, f, g, h, j, k, l, m, n, o, p, fecha, monto, score]
14
+ df = pd.DataFrame(ls).T
15
+
16
+ df.columns = data.columns
17
+ df['a'] = df['a'].astype('int64')
18
+ df['b'] = df['b'].astype('float64')
19
+ df['c'] = df['c'].astype('float64')
20
+ df['d'] = df['d'].astype('float64')
21
+ df['e'] = df['e'].astype('float64')
22
+ df['f'] = df['f'].astype('float64')
23
+ df['g'] = df['g'].astype('object')
24
+ df['h'] = df['h'].astype('int64')
25
+ df['j'] = df['j'].astype('object')
26
+ df['k'] = df['k'].astype('float64')
27
+ df['l'] = df['l'].astype('float64')
28
+ df['m'] = df['m'].astype('float64')
29
+ df['n'] = df['n'].astype('int64')
30
+ df['o'] = df['o'].astype('object')
31
+ df['p'] = df['p'].astype('object')
32
+ df['fecha'] = df['fecha'].astype('object')
33
+ df['monto'] = df['monto'].astype('float64')
34
+ df['score'] = df['score'].astype('int64')
35
+ predict_proba = pipeline.predict_proba(df)[:, 1]
36
+ predict = np.where(predict_proba<0.05018921, 'No fraude', 'Fraude')
37
+ print(predict)
38
+ output = {'probability':str(predict_proba[0]),
39
+ 'prediction':predict[0]}
40
+ print(output)
41
+ return json.dumps(output)
42
+
43
+
44
+ demo = gr.Interface(
45
+ fn = sentence_builder,
46
+ inputs=[
47
+ gr.Number(value=4, label="a"),
48
+ gr.Number(value=0.5217, label="b"),
49
+ gr.Number(value=17889.0, label="c"),
50
+ gr.Number(value=1.0, label="d"),
51
+ gr.Number(value=0.2830350998, label="e"),
52
+ gr.Number(value=12.0, label="f"),
53
+ gr.Textbox(value="BR", label="g"),
54
+ gr.Number(value=36, label="h"),
55
+ gr.Textbox(value="cat_4744ece", label="j"),
56
+ gr.Number(value=0.6366103624, label="k"),
57
+ gr.Number(value=2470.0, label="l"),
58
+ gr.Number(value=308.0, label="m"),
59
+ gr.Number(value=1, label="n"),
60
+ gr.Textbox(value='Y', label="o"),
61
+ gr.Textbox(value="Y", label="p"),
62
+ gr.Textbox(value="2020-03-18 09:31:52", label="fecha"),
63
+ gr.Number(value=24.89, label="monto"),
64
+ gr.Number(value=93, label="score")
65
+ ],
66
+ outputs="json"
67
+ )
68
+
69
+ if __name__ == "__main__":
70
+ demo.launch()
src/utils.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sklearn.base import BaseEstimator, TransformerMixin
2
+ from sklearn.preprocessing import MinMaxScaler, StandardScaler
3
+ import pandas as pd
4
+
5
+ class ScalerDf(BaseEstimator, TransformerMixin):
6
+
7
+ def __init__(self, method):
8
+ self.method = method
9
+
10
+ def transform(self, X):
11
+ X = pd.DataFrame(
12
+ self.scaler.transform(X),
13
+ columns=X.columns,
14
+ index=X.index
15
+ )
16
+ return X
17
+
18
+ def fit(self, X, y=None):
19
+ if self.method == 'minmax':
20
+ self.scaler = MinMaxScaler()
21
+ elif self.method == 'standard':
22
+ self.scaler = StandardScaler()
23
+ elif self.method == 'none':
24
+ return self
25
+ else:
26
+ raise ValueError("Invalid scaling method. Supported methods are 'minmax', 'standard', and 'none'.")
27
+
28
+ self.scaler.fit(X)
29
+ return self