Spaces:

wilmars
/

fraud-meli-app

Running

App Files Files Community

wilmars commited on Oct 9, 2023

Commit

6b8bdc1

•

1 Parent(s): 23fec3c

Upload 16 files

Browse files

Files changed (17) hide show

.gitattributes +1 -0
README.md +5 -7
data/MercadoLibre Data Scientist Technical Challenge - Dataset.csv +3 -0
data/processed/selected_features.csv +14 -0
models/feature_engineering_pipeline.joblib +3 -0
models/final_pipeline.joblib +3 -0
notebooks/01-eda.ipynb +0 -0
notebooks/02-feature_rngineering.ipynb +1293 -0
notebooks/03-feature_selection.ipynb +837 -0
notebooks/04-model _training.ipynb +0 -0
notebooks/__pycache__/utils.cpython-310.pyc +0 -0
notebooks/logs.log +808 -0
notebooks/utils.py +29 -0
requirements.txt +6 -0
src/__pycache__/utils.cpython-310.pyc +0 -0
src/app.py +70 -0
src/utils.py +29 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+data/MercadoLibre[[:space:]]Data[[:space:]]Scientist[[:space:]]Technical[[:space:]]Challenge[[:space:]]-[[:space:]]Dataset.csv filter=lfs diff=lfs merge=lfs -text

README.md CHANGED Viewed

@@ -1,13 +1,11 @@
 ---
-title: Fraud Meli App
-emoji: 📈
-colorFrom: yellow
 colorTo: gray
 sdk: gradio
-sdk_version: 3.47.1
-app_file: app.py
 pinned: false
 license: mit
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Fraud App - MELI
+emoji: 🌍
+colorFrom: green
 colorTo: gray
 sdk: gradio
+sdk_version: 3.35.2
+app_file: src/app.py
 pinned: false
 license: mit
 ---

data/MercadoLibre Data Scientist Technical Challenge - Dataset.csv ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9bfee87615b97787cfdbfe1798ec56d2de52a05d73c818bc836fa2168440c8ab
+size 18390780

data/processed/selected_features.csv ADDED Viewed

	@@ -0,0 +1,14 @@

+0
+b
+c
+h
+j
+k
+l
+m
+o
+monto
+score
+fecha_hour
+fecha_minute
+fecha_second

models/feature_engineering_pipeline.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:501863a278dd5e6cf3597123afbcacd4ed25eca8a27cf3259f3e7989236f7fdf
+size 9157

models/final_pipeline.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a5ccc8788d5fa15337b2ee31b0c95e6c86cc7da346c5b305d4c4b089725c7d1b
+size 433034

notebooks/01-eda.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

notebooks/02-feature_rngineering.ipynb ADDED Viewed

	@@ -0,0 +1,1293 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%load_ext autoreload\n",
+    "%autoreload 2\n",
+    "import pandas as pd\n",
+    "from sklearn.model_selection import train_test_split\n",
+    "from feature_engine.imputation import AddMissingIndicator, MeanMedianImputer, CategoricalImputer\n",
+    "from feature_engine.transformation import LogTransformer\n",
+    "from feature_engine.discretisation import ArbitraryDiscretiser\n",
+    "from feature_engine.encoding import RareLabelEncoder, OrdinalEncoder\n",
+    "from feature_engine.datetime import DatetimeFeatures\n",
+    "from utils import ScalerDf\n",
+    "from sklearn.pipeline import Pipeline\n",
+    "import joblib\n",
+    "import numpy as np"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "(150000, 19)\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>a</th>\n",
+       "      <th>b</th>\n",
+       "      <th>c</th>\n",
+       "      <th>d</th>\n",
+       "      <th>e</th>\n",
+       "      <th>f</th>\n",
+       "      <th>g</th>\n",
+       "      <th>h</th>\n",
+       "      <th>j</th>\n",
+       "      <th>k</th>\n",
+       "      <th>l</th>\n",
+       "      <th>m</th>\n",
+       "      <th>n</th>\n",
+       "      <th>o</th>\n",
+       "      <th>p</th>\n",
+       "      <th>fecha</th>\n",
+       "      <th>monto</th>\n",
+       "      <th>score</th>\n",
+       "      <th>fraude</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>4</td>\n",
+       "      <td>0.6812</td>\n",
+       "      <td>50084.12</td>\n",
+       "      <td>50.0</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>20.0</td>\n",
+       "      <td>AR</td>\n",
+       "      <td>1</td>\n",
+       "      <td>cat_d26ab52</td>\n",
+       "      <td>0.365475</td>\n",
+       "      <td>2479.0</td>\n",
+       "      <td>952.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>Y</td>\n",
+       "      <td>2020-03-20 09:28:19</td>\n",
+       "      <td>57.63</td>\n",
+       "      <td>100</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>4</td>\n",
+       "      <td>0.6694</td>\n",
+       "      <td>66005.49</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>2.0</td>\n",
+       "      <td>AR</td>\n",
+       "      <td>1</td>\n",
+       "      <td>cat_ea962fb</td>\n",
+       "      <td>0.612728</td>\n",
+       "      <td>2603.0</td>\n",
+       "      <td>105.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>Y</td>\n",
+       "      <td>Y</td>\n",
+       "      <td>2020-03-09 13:58:28</td>\n",
+       "      <td>40.19</td>\n",
+       "      <td>25</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>4</td>\n",
+       "      <td>0.4718</td>\n",
+       "      <td>7059.05</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>0.463488</td>\n",
+       "      <td>92.0</td>\n",
+       "      <td>BR</td>\n",
+       "      <td>25</td>\n",
+       "      <td>cat_4c2544e</td>\n",
+       "      <td>0.651835</td>\n",
+       "      <td>2153.0</td>\n",
+       "      <td>249.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>Y</td>\n",
+       "      <td>Y</td>\n",
+       "      <td>2020-04-08 12:25:55</td>\n",
+       "      <td>5.77</td>\n",
+       "      <td>23</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>4</td>\n",
+       "      <td>0.7260</td>\n",
+       "      <td>10043.10</td>\n",
+       "      <td>24.0</td>\n",
+       "      <td>0.046845</td>\n",
+       "      <td>43.0</td>\n",
+       "      <td>BR</td>\n",
+       "      <td>43</td>\n",
+       "      <td>cat_1b59ee3</td>\n",
+       "      <td>0.692728</td>\n",
+       "      <td>4845.0</td>\n",
+       "      <td>141.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>N</td>\n",
+       "      <td>Y</td>\n",
+       "      <td>2020-03-14 11:46:13</td>\n",
+       "      <td>40.89</td>\n",
+       "      <td>23</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>4</td>\n",
+       "      <td>0.7758</td>\n",
+       "      <td>16584.42</td>\n",
+       "      <td>2.0</td>\n",
+       "      <td>0.154616</td>\n",
+       "      <td>54.0</td>\n",
+       "      <td>BR</td>\n",
+       "      <td>0</td>\n",
+       "      <td>cat_9bacaa5</td>\n",
+       "      <td>0.201354</td>\n",
+       "      <td>2856.0</td>\n",
+       "      <td>18.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>Y</td>\n",
+       "      <td>N</td>\n",
+       "      <td>2020-03-23 14:17:13</td>\n",
+       "      <td>18.98</td>\n",
+       "      <td>71</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   a       b         c     d         e     f   g   h            j         k  \\\n",
+       "0  4  0.6812  50084.12  50.0  0.000000  20.0  AR   1  cat_d26ab52  0.365475   \n",
+       "1  4  0.6694  66005.49   0.0  0.000000   2.0  AR   1  cat_ea962fb  0.612728   \n",
+       "2  4  0.4718   7059.05   4.0  0.463488  92.0  BR  25  cat_4c2544e  0.651835   \n",
+       "3  4  0.7260  10043.10  24.0  0.046845  43.0  BR  43  cat_1b59ee3  0.692728   \n",
+       "4  4  0.7758  16584.42   2.0  0.154616  54.0  BR   0  cat_9bacaa5  0.201354   \n",
+       "\n",
+       "        l      m  n    o  p                fecha  monto  score  fraude  \n",
+       "0  2479.0  952.0  1  NaN  Y  2020-03-20 09:28:19  57.63    100       0  \n",
+       "1  2603.0  105.0  1    Y  Y  2020-03-09 13:58:28  40.19     25       0  \n",
+       "2  2153.0  249.0  1    Y  Y  2020-04-08 12:25:55   5.77     23       0  \n",
+       "3  4845.0  141.0  1    N  Y  2020-03-14 11:46:13  40.89     23       0  \n",
+       "4  2856.0   18.0  1    Y  N  2020-03-23 14:17:13  18.98     71       0  "
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "data = pd.read_csv('../data/MercadoLibre Data Scientist Technical Challenge - Dataset.csv')\n",
+    "print(data.shape)\n",
+    "data.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "((135000, 18), (15000, 18))"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "X_train, X_test, y_train, y_test = train_test_split(\n",
+    "    data.drop(['fraude'], axis=1), # predictive variables\n",
+    "    data['fraude'], # target\n",
+    "    test_size=0.1, # portion of dataset to allocate to test set\n",
+    "    random_state=0, # we are setting the seed here\n",
+    ")\n",
+    "\n",
+    "X_train.shape, X_test.shape"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## missing indicator"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "## Vars with na\n",
+    "vars_with_na = [var for var in data.columns if data[var].isnull().sum() > 0]\n",
+    "indicator = AddMissingIndicator(variables=vars_with_na)\n",
+    "indicator.fit(X_train)\n",
+    "transform_data =indicator.transform(X_train)"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Imputation on numerical vars"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# make list of numerical variables\n",
+    "num_vars = [var for var in data.columns if data[var].dtypes != 'O' and 'fraude' not in var]\n",
+    "num_vars_na = [var for var in num_vars if var in vars_with_na]\n",
+    "\n",
+    "imputer = MeanMedianImputer(imputation_method='median', variables=num_vars_na)\n",
+    "imputer.fit(transform_data)\n",
+    "transform_data =imputer.transform(transform_data)"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Transformation of numerical vars"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "log_vars =['c','monto']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "logtranformer = LogTransformer(variables=log_vars)\n",
+    "logtranformer.fit(transform_data)\n",
+    "transform_data = logtranformer.transform(transform_data)"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Discretizacion"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>a</th>\n",
+       "      <th>b</th>\n",
+       "      <th>c</th>\n",
+       "      <th>d</th>\n",
+       "      <th>e</th>\n",
+       "      <th>f</th>\n",
+       "      <th>g</th>\n",
+       "      <th>h</th>\n",
+       "      <th>j</th>\n",
+       "      <th>k</th>\n",
+       "      <th>...</th>\n",
+       "      <th>monto</th>\n",
+       "      <th>score</th>\n",
+       "      <th>b_na</th>\n",
+       "      <th>c_na</th>\n",
+       "      <th>d_na</th>\n",
+       "      <th>f_na</th>\n",
+       "      <th>g_na</th>\n",
+       "      <th>l_na</th>\n",
+       "      <th>m_na</th>\n",
+       "      <th>o_na</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>135569</th>\n",
+       "      <td>4</td>\n",
+       "      <td>0.5217</td>\n",
+       "      <td>9.791941</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>BR</td>\n",
+       "      <td>36</td>\n",
+       "      <td>cat_4744ece</td>\n",
+       "      <td>0.636610</td>\n",
+       "      <td>...</td>\n",
+       "      <td>3.214466</td>\n",
+       "      <td>93</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>78656</th>\n",
+       "      <td>2</td>\n",
+       "      <td>0.7554</td>\n",
+       "      <td>10.686472</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>AR</td>\n",
+       "      <td>8</td>\n",
+       "      <td>cat_3203c7c</td>\n",
+       "      <td>0.633266</td>\n",
+       "      <td>...</td>\n",
+       "      <td>3.364188</td>\n",
+       "      <td>6</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>87437</th>\n",
+       "      <td>4</td>\n",
+       "      <td>0.5437</td>\n",
+       "      <td>11.717906</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>AR</td>\n",
+       "      <td>46</td>\n",
+       "      <td>cat_5b785c6</td>\n",
+       "      <td>0.735749</td>\n",
+       "      <td>...</td>\n",
+       "      <td>3.106826</td>\n",
+       "      <td>55</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>131674</th>\n",
+       "      <td>4</td>\n",
+       "      <td>0.7418</td>\n",
+       "      <td>9.755215</td>\n",
+       "      <td>50.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>BR</td>\n",
+       "      <td>9</td>\n",
+       "      <td>cat_a8c10a4</td>\n",
+       "      <td>0.529367</td>\n",
+       "      <td>...</td>\n",
+       "      <td>2.867899</td>\n",
+       "      <td>7</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>45535</th>\n",
+       "      <td>4</td>\n",
+       "      <td>0.6463</td>\n",
+       "      <td>10.851127</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>AR</td>\n",
+       "      <td>22</td>\n",
+       "      <td>cat_edae169</td>\n",
+       "      <td>0.049212</td>\n",
+       "      <td>...</td>\n",
+       "      <td>3.383712</td>\n",
+       "      <td>32</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>5 rows × 26 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "        a       b          c     d  e  f   g   h            j         k  ...  \\\n",
+       "135569  4  0.5217   9.791941   1.0  1  1  BR  36  cat_4744ece  0.636610  ...   \n",
+       "78656   2  0.7554  10.686472   1.0  0  1  AR   8  cat_3203c7c  0.633266  ...   \n",
+       "87437   4  0.5437  11.717906   1.0  1  1  AR  46  cat_5b785c6  0.735749  ...   \n",
+       "131674  4  0.7418   9.755215  50.0  1  1  BR   9  cat_a8c10a4  0.529367  ...   \n",
+       "45535   4  0.6463  10.851127   4.0  1  1  AR  22  cat_edae169  0.049212  ...   \n",
+       "\n",
+       "           monto  score  b_na c_na d_na f_na  g_na  l_na  m_na  o_na  \n",
+       "135569  3.214466     93     0    0    0    0     0     0     0     1  \n",
+       "78656   3.364188      6     1    1    0    0     0     0     0     1  \n",
+       "87437   3.106826     55     0    0    0    0     0     0     0     1  \n",
+       "131674  2.867899      7     0    0    0    0     0     0     0     1  \n",
+       "45535   3.383712     32     0    0    0    0     0     0     0     0  \n",
+       "\n",
+       "[5 rows x 26 columns]"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "skewed_vars = ['e', 'f']\n",
+    "discretizer = ArbitraryDiscretiser( binning_dict= dict(e =[-np.inf,0,np.inf], f=[-np.inf,0,np.inf]) )\n",
+    "discretizer.fit(transform_data)\n",
+    "transform_data = discretizer.transform(transform_data)\n",
+    "transform_data.head()"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Transformacion variables categoricas"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# capture categorical variables in a list\n",
+    "cat_vars = [var for var in data.columns if data[var].dtypes == 'O' and 'fecha' not in var]\n",
+    "cat_vars_na = [var for var in cat_vars if var in vars_with_na]\n",
+    "categorical_imputer  = CategoricalImputer(variables=cat_vars_na, imputation_method='missing', fill_value='missing')\n",
+    "categorical_imputer.fit(transform_data)\n",
+    "transform_data = categorical_imputer.transform(transform_data)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "## Encode rare labels\n",
+    "rarelabel = RareLabelEncoder(variables=cat_vars, tol=0.001, n_categories=1)\n",
+    "rarelabel.fit(transform_data)\n",
+    "transform_data = rarelabel.transform(transform_data)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "## ordinal encoders\n",
+    "ordinal_encoder = OrdinalEncoder(variables=cat_vars)\n",
+    "ordinal_encoder.fit(transform_data, y_train)\n",
+    "transform_data = ordinal_encoder.transform(transform_data)"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Datetime Features"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dt_features = DatetimeFeatures(variables='fecha', features_to_extract='all')\n",
+    "dt_features.fit(transform_data)\n",
+    "transform_data = dt_features.transform(transform_data)"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Scaler data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "scaler = ScalerDf(method='minmax')\n",
+    "scaler.fit(transform_data)\n",
+    "transform_data = scaler.transform(transform_data)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Index(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'j', 'k', 'l', 'm', 'n', 'o',\n",
+       "       'p', 'monto', 'score', 'b_na', 'c_na', 'd_na', 'f_na', 'g_na', 'l_na',\n",
+       "       'm_na', 'o_na', 'fecha_month', 'fecha_quarter', 'fecha_semester',\n",
+       "       'fecha_year', 'fecha_week', 'fecha_day_of_week', 'fecha_day_of_month',\n",
+       "       'fecha_day_of_year', 'fecha_weekend', 'fecha_month_start',\n",
+       "       'fecha_month_end', 'fecha_quarter_start', 'fecha_quarter_end',\n",
+       "       'fecha_year_start', 'fecha_year_end', 'fecha_leap_year',\n",
+       "       'fecha_days_in_month', 'fecha_hour', 'fecha_minute', 'fecha_second'],\n",
+       "      dtype='object')"
+      ]
+     },
+     "execution_count": 14,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "transform_data.columns"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Pongamos todo junto"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pipeline_steps = [\n",
+    "    ('missing_indicator',AddMissingIndicator(variables=vars_with_na)),\n",
+    "    ('numerical_imputer', MeanMedianImputer(imputation_method='median', variables=num_vars_na)),\n",
+    "    ('categorical_imputer', CategoricalImputer(variables=cat_vars_na, imputation_method='missing', fill_value='missing')),\n",
+    "    ('numerical_transformation', LogTransformer(variables=log_vars)),\n",
+    "    ('binarizer', ArbitraryDiscretiser( binning_dict= dict(e =[-np.inf,0,np.inf], f=[-np.inf,0,np.inf]))),\n",
+    "    ('rare_label_encoder', RareLabelEncoder(variables=cat_vars, tol=0.001, n_categories=1)),\n",
+    "    ('ordinal_encoder', OrdinalEncoder(variables=cat_vars)),\n",
+    "    ('datetime_features', DatetimeFeatures(variables='fecha', features_to_extract='all')),\n",
+    "    ('scaler', ScalerDf(method='minmax'))\n",
+    "    \n",
+    "]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fraud_pipeline = Pipeline(pipeline_steps)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<style>#sk-container-id-1 {color: black;}#sk-container-id-1 pre{padding: 0;}#sk-container-id-1 div.sk-toggleable {background-color: white;}#sk-container-id-1 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-1 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-1 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-1 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-1 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-1 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-1 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-1 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-1 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-1 div.sk-item {position: relative;z-index: 1;}#sk-container-id-1 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-1 div.sk-item::before, #sk-container-id-1 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-1 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-1 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-1 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-1 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-1 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-1 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-1 div.sk-label-container {text-align: center;}#sk-container-id-1 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-1 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-1\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>Pipeline(steps=[(&#x27;missing_indicator&#x27;,\n",
+       "                 AddMissingIndicator(variables=[&#x27;b&#x27;, &#x27;c&#x27;, &#x27;d&#x27;, &#x27;f&#x27;, &#x27;g&#x27;, &#x27;l&#x27;,\n",
+       "                                                &#x27;m&#x27;, &#x27;o&#x27;])),\n",
+       "                (&#x27;numerical_imputer&#x27;,\n",
+       "                 MeanMedianImputer(variables=[&#x27;b&#x27;, &#x27;c&#x27;, &#x27;d&#x27;, &#x27;f&#x27;, &#x27;l&#x27;, &#x27;m&#x27;])),\n",
+       "                (&#x27;categorical_imputer&#x27;,\n",
+       "                 CategoricalImputer(fill_value=&#x27;missing&#x27;,\n",
+       "                                    variables=[&#x27;g&#x27;, &#x27;o&#x27;])),\n",
+       "                (&#x27;numerical_transformation&#x27;,\n",
+       "                 LogTransformer(variables=[&#x27;c&#x27;, &#x27;monto&#x27;])),\n",
+       "                (&#x27;binarizer&#x27;,\n",
+       "                 ArbitraryDiscretiser(binning_dict={&#x27;e&#x27;: [-inf, 0, inf],\n",
+       "                                                    &#x27;f&#x27;: [-inf, 0, inf]})),\n",
+       "                (&#x27;rare_label_encoder&#x27;,\n",
+       "                 RareLabelEncoder(n_categories=1, tol=0.001,\n",
+       "                                  variables=[&#x27;g&#x27;, &#x27;j&#x27;, &#x27;o&#x27;, &#x27;p&#x27;])),\n",
+       "                (&#x27;ordinal_encoder&#x27;,\n",
+       "                 OrdinalEncoder(variables=[&#x27;g&#x27;, &#x27;j&#x27;, &#x27;o&#x27;, &#x27;p&#x27;])),\n",
+       "                (&#x27;datetime_features&#x27;,\n",
+       "                 DatetimeFeatures(features_to_extract=&#x27;all&#x27;,\n",
+       "                                  variables=&#x27;fecha&#x27;)),\n",
+       "                (&#x27;scaler&#x27;, ScalerDf(method=&#x27;minmax&#x27;))])</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-1\" type=\"checkbox\" ><label for=\"sk-estimator-id-1\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">Pipeline</label><div class=\"sk-toggleable__content\"><pre>Pipeline(steps=[(&#x27;missing_indicator&#x27;,\n",
+       "                 AddMissingIndicator(variables=[&#x27;b&#x27;, &#x27;c&#x27;, &#x27;d&#x27;, &#x27;f&#x27;, &#x27;g&#x27;, &#x27;l&#x27;,\n",
+       "                                                &#x27;m&#x27;, &#x27;o&#x27;])),\n",
+       "                (&#x27;numerical_imputer&#x27;,\n",
+       "                 MeanMedianImputer(variables=[&#x27;b&#x27;, &#x27;c&#x27;, &#x27;d&#x27;, &#x27;f&#x27;, &#x27;l&#x27;, &#x27;m&#x27;])),\n",
+       "                (&#x27;categorical_imputer&#x27;,\n",
+       "                 CategoricalImputer(fill_value=&#x27;missing&#x27;,\n",
+       "                                    variables=[&#x27;g&#x27;, &#x27;o&#x27;])),\n",
+       "                (&#x27;numerical_transformation&#x27;,\n",
+       "                 LogTransformer(variables=[&#x27;c&#x27;, &#x27;monto&#x27;])),\n",
+       "                (&#x27;binarizer&#x27;,\n",
+       "                 ArbitraryDiscretiser(binning_dict={&#x27;e&#x27;: [-inf, 0, inf],\n",
+       "                                                    &#x27;f&#x27;: [-inf, 0, inf]})),\n",
+       "                (&#x27;rare_label_encoder&#x27;,\n",
+       "                 RareLabelEncoder(n_categories=1, tol=0.001,\n",
+       "                                  variables=[&#x27;g&#x27;, &#x27;j&#x27;, &#x27;o&#x27;, &#x27;p&#x27;])),\n",
+       "                (&#x27;ordinal_encoder&#x27;,\n",
+       "                 OrdinalEncoder(variables=[&#x27;g&#x27;, &#x27;j&#x27;, &#x27;o&#x27;, &#x27;p&#x27;])),\n",
+       "                (&#x27;datetime_features&#x27;,\n",
+       "                 DatetimeFeatures(features_to_extract=&#x27;all&#x27;,\n",
+       "                                  variables=&#x27;fecha&#x27;)),\n",
+       "                (&#x27;scaler&#x27;, ScalerDf(method=&#x27;minmax&#x27;))])</pre></div></div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-2\" type=\"checkbox\" ><label for=\"sk-estimator-id-2\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">AddMissingIndicator</label><div class=\"sk-toggleable__content\"><pre>AddMissingIndicator(variables=[&#x27;b&#x27;, &#x27;c&#x27;, &#x27;d&#x27;, &#x27;f&#x27;, &#x27;g&#x27;, &#x27;l&#x27;, &#x27;m&#x27;, &#x27;o&#x27;])</pre></div></div></div><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-3\" type=\"checkbox\" ><label for=\"sk-estimator-id-3\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">MeanMedianImputer</label><div class=\"sk-toggleable__content\"><pre>MeanMedianImputer(variables=[&#x27;b&#x27;, &#x27;c&#x27;, &#x27;d&#x27;, &#x27;f&#x27;, &#x27;l&#x27;, &#x27;m&#x27;])</pre></div></div></div><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-4\" type=\"checkbox\" ><label for=\"sk-estimator-id-4\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">CategoricalImputer</label><div class=\"sk-toggleable__content\"><pre>CategoricalImputer(fill_value=&#x27;missing&#x27;, variables=[&#x27;g&#x27;, &#x27;o&#x27;])</pre></div></div></div><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-5\" type=\"checkbox\" ><label for=\"sk-estimator-id-5\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">LogTransformer</label><div class=\"sk-toggleable__content\"><pre>LogTransformer(variables=[&#x27;c&#x27;, &#x27;monto&#x27;])</pre></div></div></div><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-6\" type=\"checkbox\" ><label for=\"sk-estimator-id-6\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">ArbitraryDiscretiser</label><div class=\"sk-toggleable__content\"><pre>ArbitraryDiscretiser(binning_dict={&#x27;e&#x27;: [-inf, 0, inf], &#x27;f&#x27;: [-inf, 0, inf]})</pre></div></div></div><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-7\" type=\"checkbox\" ><label for=\"sk-estimator-id-7\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">RareLabelEncoder</label><div class=\"sk-toggleable__content\"><pre>RareLabelEncoder(n_categories=1, tol=0.001, variables=[&#x27;g&#x27;, &#x27;j&#x27;, &#x27;o&#x27;, &#x27;p&#x27;])</pre></div></div></div><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-8\" type=\"checkbox\" ><label for=\"sk-estimator-id-8\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">OrdinalEncoder</label><div class=\"sk-toggleable__content\"><pre>OrdinalEncoder(variables=[&#x27;g&#x27;, &#x27;j&#x27;, &#x27;o&#x27;, &#x27;p&#x27;])</pre></div></div></div><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-9\" type=\"checkbox\" ><label for=\"sk-estimator-id-9\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">DatetimeFeatures</label><div class=\"sk-toggleable__content\"><pre>DatetimeFeatures(features_to_extract=&#x27;all&#x27;, variables=&#x27;fecha&#x27;)</pre></div></div></div><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-10\" type=\"checkbox\" ><label for=\"sk-estimator-id-10\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">ScalerDf</label><div class=\"sk-toggleable__content\"><pre>ScalerDf(method=&#x27;minmax&#x27;)</pre></div></div></div></div></div></div></div>"
+      ],
+      "text/plain": [
+       "Pipeline(steps=[('missing_indicator',\n",
+       "                 AddMissingIndicator(variables=['b', 'c', 'd', 'f', 'g', 'l',\n",
+       "                                                'm', 'o'])),\n",
+       "                ('numerical_imputer',\n",
+       "                 MeanMedianImputer(variables=['b', 'c', 'd', 'f', 'l', 'm'])),\n",
+       "                ('categorical_imputer',\n",
+       "                 CategoricalImputer(fill_value='missing',\n",
+       "                                    variables=['g', 'o'])),\n",
+       "                ('numerical_transformation',\n",
+       "                 LogTransformer(variables=['c', 'monto'])),\n",
+       "                ('binarizer',\n",
+       "                 ArbitraryDiscretiser(binning_dict={'e': [-inf, 0, inf],\n",
+       "                                                    'f': [-inf, 0, inf]})),\n",
+       "                ('rare_label_encoder',\n",
+       "                 RareLabelEncoder(n_categories=1, tol=0.001,\n",
+       "                                  variables=['g', 'j', 'o', 'p'])),\n",
+       "                ('ordinal_encoder',\n",
+       "                 OrdinalEncoder(variables=['g', 'j', 'o', 'p'])),\n",
+       "                ('datetime_features',\n",
+       "                 DatetimeFeatures(features_to_extract='all',\n",
+       "                                  variables='fecha')),\n",
+       "                ('scaler', ScalerDf(method='minmax'))])"
+      ]
+     },
+     "execution_count": 17,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "fraud_pipeline"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<style>#sk-container-id-2 {color: black;}#sk-container-id-2 pre{padding: 0;}#sk-container-id-2 div.sk-toggleable {background-color: white;}#sk-container-id-2 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-2 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-2 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-2 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-2 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-2 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-2 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-2 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-2 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-2 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-2 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-2 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-2 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-2 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-2 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-2 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-2 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-2 div.sk-item {position: relative;z-index: 1;}#sk-container-id-2 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-2 div.sk-item::before, #sk-container-id-2 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-2 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-2 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-2 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-2 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-2 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-2 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-2 div.sk-label-container {text-align: center;}#sk-container-id-2 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-2 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-2\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>Pipeline(steps=[(&#x27;missing_indicator&#x27;,\n",
+       "                 AddMissingIndicator(variables=[&#x27;b&#x27;, &#x27;c&#x27;, &#x27;d&#x27;, &#x27;f&#x27;, &#x27;g&#x27;, &#x27;l&#x27;,\n",
+       "                                                &#x27;m&#x27;, &#x27;o&#x27;])),\n",
+       "                (&#x27;numerical_imputer&#x27;,\n",
+       "                 MeanMedianImputer(variables=[&#x27;b&#x27;, &#x27;c&#x27;, &#x27;d&#x27;, &#x27;f&#x27;, &#x27;l&#x27;, &#x27;m&#x27;])),\n",
+       "                (&#x27;categorical_imputer&#x27;,\n",
+       "                 CategoricalImputer(fill_value=&#x27;missing&#x27;,\n",
+       "                                    variables=[&#x27;g&#x27;, &#x27;o&#x27;])),\n",
+       "                (&#x27;numerical_transformation&#x27;,\n",
+       "                 LogTransformer(variables=[&#x27;c&#x27;, &#x27;monto&#x27;])),\n",
+       "                (&#x27;binarizer&#x27;,\n",
+       "                 ArbitraryDiscretiser(binning_dict={&#x27;e&#x27;: [-inf, 0, inf],\n",
+       "                                                    &#x27;f&#x27;: [-inf, 0, inf]})),\n",
+       "                (&#x27;rare_label_encoder&#x27;,\n",
+       "                 RareLabelEncoder(n_categories=1, tol=0.001,\n",
+       "                                  variables=[&#x27;g&#x27;, &#x27;j&#x27;, &#x27;o&#x27;, &#x27;p&#x27;])),\n",
+       "                (&#x27;ordinal_encoder&#x27;,\n",
+       "                 OrdinalEncoder(variables=[&#x27;g&#x27;, &#x27;j&#x27;, &#x27;o&#x27;, &#x27;p&#x27;])),\n",
+       "                (&#x27;datetime_features&#x27;,\n",
+       "                 DatetimeFeatures(features_to_extract=&#x27;all&#x27;,\n",
+       "                                  variables=&#x27;fecha&#x27;)),\n",
+       "                (&#x27;scaler&#x27;, ScalerDf(method=&#x27;minmax&#x27;))])</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-11\" type=\"checkbox\" ><label for=\"sk-estimator-id-11\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">Pipeline</label><div class=\"sk-toggleable__content\"><pre>Pipeline(steps=[(&#x27;missing_indicator&#x27;,\n",
+       "                 AddMissingIndicator(variables=[&#x27;b&#x27;, &#x27;c&#x27;, &#x27;d&#x27;, &#x27;f&#x27;, &#x27;g&#x27;, &#x27;l&#x27;,\n",
+       "                                                &#x27;m&#x27;, &#x27;o&#x27;])),\n",
+       "                (&#x27;numerical_imputer&#x27;,\n",
+       "                 MeanMedianImputer(variables=[&#x27;b&#x27;, &#x27;c&#x27;, &#x27;d&#x27;, &#x27;f&#x27;, &#x27;l&#x27;, &#x27;m&#x27;])),\n",
+       "                (&#x27;categorical_imputer&#x27;,\n",
+       "                 CategoricalImputer(fill_value=&#x27;missing&#x27;,\n",
+       "                                    variables=[&#x27;g&#x27;, &#x27;o&#x27;])),\n",
+       "                (&#x27;numerical_transformation&#x27;,\n",
+       "                 LogTransformer(variables=[&#x27;c&#x27;, &#x27;monto&#x27;])),\n",
+       "                (&#x27;binarizer&#x27;,\n",
+       "                 ArbitraryDiscretiser(binning_dict={&#x27;e&#x27;: [-inf, 0, inf],\n",
+       "                                                    &#x27;f&#x27;: [-inf, 0, inf]})),\n",
+       "                (&#x27;rare_label_encoder&#x27;,\n",
+       "                 RareLabelEncoder(n_categories=1, tol=0.001,\n",
+       "                                  variables=[&#x27;g&#x27;, &#x27;j&#x27;, &#x27;o&#x27;, &#x27;p&#x27;])),\n",
+       "                (&#x27;ordinal_encoder&#x27;,\n",
+       "                 OrdinalEncoder(variables=[&#x27;g&#x27;, &#x27;j&#x27;, &#x27;o&#x27;, &#x27;p&#x27;])),\n",
+       "                (&#x27;datetime_features&#x27;,\n",
+       "                 DatetimeFeatures(features_to_extract=&#x27;all&#x27;,\n",
+       "                                  variables=&#x27;fecha&#x27;)),\n",
+       "                (&#x27;scaler&#x27;, ScalerDf(method=&#x27;minmax&#x27;))])</pre></div></div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-12\" type=\"checkbox\" ><label for=\"sk-estimator-id-12\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">AddMissingIndicator</label><div class=\"sk-toggleable__content\"><pre>AddMissingIndicator(variables=[&#x27;b&#x27;, &#x27;c&#x27;, &#x27;d&#x27;, &#x27;f&#x27;, &#x27;g&#x27;, &#x27;l&#x27;, &#x27;m&#x27;, &#x27;o&#x27;])</pre></div></div></div><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-13\" type=\"checkbox\" ><label for=\"sk-estimator-id-13\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">MeanMedianImputer</label><div class=\"sk-toggleable__content\"><pre>MeanMedianImputer(variables=[&#x27;b&#x27;, &#x27;c&#x27;, &#x27;d&#x27;, &#x27;f&#x27;, &#x27;l&#x27;, &#x27;m&#x27;])</pre></div></div></div><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-14\" type=\"checkbox\" ><label for=\"sk-estimator-id-14\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">CategoricalImputer</label><div class=\"sk-toggleable__content\"><pre>CategoricalImputer(fill_value=&#x27;missing&#x27;, variables=[&#x27;g&#x27;, &#x27;o&#x27;])</pre></div></div></div><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-15\" type=\"checkbox\" ><label for=\"sk-estimator-id-15\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">LogTransformer</label><div class=\"sk-toggleable__content\"><pre>LogTransformer(variables=[&#x27;c&#x27;, &#x27;monto&#x27;])</pre></div></div></div><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-16\" type=\"checkbox\" ><label for=\"sk-estimator-id-16\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">ArbitraryDiscretiser</label><div class=\"sk-toggleable__content\"><pre>ArbitraryDiscretiser(binning_dict={&#x27;e&#x27;: [-inf, 0, inf], &#x27;f&#x27;: [-inf, 0, inf]})</pre></div></div></div><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-17\" type=\"checkbox\" ><label for=\"sk-estimator-id-17\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">RareLabelEncoder</label><div class=\"sk-toggleable__content\"><pre>RareLabelEncoder(n_categories=1, tol=0.001, variables=[&#x27;g&#x27;, &#x27;j&#x27;, &#x27;o&#x27;, &#x27;p&#x27;])</pre></div></div></div><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-18\" type=\"checkbox\" ><label for=\"sk-estimator-id-18\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">OrdinalEncoder</label><div class=\"sk-toggleable__content\"><pre>OrdinalEncoder(variables=[&#x27;g&#x27;, &#x27;j&#x27;, &#x27;o&#x27;, &#x27;p&#x27;])</pre></div></div></div><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-19\" type=\"checkbox\" ><label for=\"sk-estimator-id-19\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">DatetimeFeatures</label><div class=\"sk-toggleable__content\"><pre>DatetimeFeatures(features_to_extract=&#x27;all&#x27;, variables=&#x27;fecha&#x27;)</pre></div></div></div><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-20\" type=\"checkbox\" ><label for=\"sk-estimator-id-20\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">ScalerDf</label><div class=\"sk-toggleable__content\"><pre>ScalerDf(method=&#x27;minmax&#x27;)</pre></div></div></div></div></div></div></div>"
+      ],
+      "text/plain": [
+       "Pipeline(steps=[('missing_indicator',\n",
+       "                 AddMissingIndicator(variables=['b', 'c', 'd', 'f', 'g', 'l',\n",
+       "                                                'm', 'o'])),\n",
+       "                ('numerical_imputer',\n",
+       "                 MeanMedianImputer(variables=['b', 'c', 'd', 'f', 'l', 'm'])),\n",
+       "                ('categorical_imputer',\n",
+       "                 CategoricalImputer(fill_value='missing',\n",
+       "                                    variables=['g', 'o'])),\n",
+       "                ('numerical_transformation',\n",
+       "                 LogTransformer(variables=['c', 'monto'])),\n",
+       "                ('binarizer',\n",
+       "                 ArbitraryDiscretiser(binning_dict={'e': [-inf, 0, inf],\n",
+       "                                                    'f': [-inf, 0, inf]})),\n",
+       "                ('rare_label_encoder',\n",
+       "                 RareLabelEncoder(n_categories=1, tol=0.001,\n",
+       "                                  variables=['g', 'j', 'o', 'p'])),\n",
+       "                ('ordinal_encoder',\n",
+       "                 OrdinalEncoder(variables=['g', 'j', 'o', 'p'])),\n",
+       "                ('datetime_features',\n",
+       "                 DatetimeFeatures(features_to_extract='all',\n",
+       "                                  variables='fecha')),\n",
+       "                ('scaler', ScalerDf(method='minmax'))])"
+      ]
+     },
+     "execution_count": 18,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "fraud_pipeline.fit(X_train, y_train)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>a</th>\n",
+       "      <th>b</th>\n",
+       "      <th>c</th>\n",
+       "      <th>d</th>\n",
+       "      <th>e</th>\n",
+       "      <th>f</th>\n",
+       "      <th>g</th>\n",
+       "      <th>h</th>\n",
+       "      <th>j</th>\n",
+       "      <th>k</th>\n",
+       "      <th>...</th>\n",
+       "      <th>fecha_month_end</th>\n",
+       "      <th>fecha_quarter_start</th>\n",
+       "      <th>fecha_quarter_end</th>\n",
+       "      <th>fecha_year_start</th>\n",
+       "      <th>fecha_year_end</th>\n",
+       "      <th>fecha_leap_year</th>\n",
+       "      <th>fecha_days_in_month</th>\n",
+       "      <th>fecha_hour</th>\n",
+       "      <th>fecha_minute</th>\n",
+       "      <th>fecha_second</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>135569</th>\n",
+       "      <td>1.000000</td>\n",
+       "      <td>0.5217</td>\n",
+       "      <td>0.635969</td>\n",
+       "      <td>0.02</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.714286</td>\n",
+       "      <td>0.620690</td>\n",
+       "      <td>0.458599</td>\n",
+       "      <td>0.636612</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.391304</td>\n",
+       "      <td>0.525424</td>\n",
+       "      <td>0.881356</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>78656</th>\n",
+       "      <td>0.333333</td>\n",
+       "      <td>0.7554</td>\n",
+       "      <td>0.684908</td>\n",
+       "      <td>0.02</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.428571</td>\n",
+       "      <td>0.137931</td>\n",
+       "      <td>0.133758</td>\n",
+       "      <td>0.633268</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.347826</td>\n",
+       "      <td>0.254237</td>\n",
+       "      <td>0.288136</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>87437</th>\n",
+       "      <td>1.000000</td>\n",
+       "      <td>0.5437</td>\n",
+       "      <td>0.741337</td>\n",
+       "      <td>0.02</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.428571</td>\n",
+       "      <td>0.793103</td>\n",
+       "      <td>0.458599</td>\n",
+       "      <td>0.735751</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.391304</td>\n",
+       "      <td>0.050847</td>\n",
+       "      <td>0.338983</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>131674</th>\n",
+       "      <td>1.000000</td>\n",
+       "      <td>0.7418</td>\n",
+       "      <td>0.633959</td>\n",
+       "      <td>1.00</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.714286</td>\n",
+       "      <td>0.155172</td>\n",
+       "      <td>0.458599</td>\n",
+       "      <td>0.529368</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.782609</td>\n",
+       "      <td>0.915254</td>\n",
+       "      <td>0.101695</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>45535</th>\n",
+       "      <td>1.000000</td>\n",
+       "      <td>0.6463</td>\n",
+       "      <td>0.693916</td>\n",
+       "      <td>0.08</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.428571</td>\n",
+       "      <td>0.379310</td>\n",
+       "      <td>0.458599</td>\n",
+       "      <td>0.049208</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.913043</td>\n",
+       "      <td>0.406780</td>\n",
+       "      <td>0.508475</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>41993</th>\n",
+       "      <td>1.000000</td>\n",
+       "      <td>0.8063</td>\n",
+       "      <td>0.831573</td>\n",
+       "      <td>0.06</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.714286</td>\n",
+       "      <td>0.155172</td>\n",
+       "      <td>0.312102</td>\n",
+       "      <td>0.164571</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.826087</td>\n",
+       "      <td>0.067797</td>\n",
+       "      <td>0.762712</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>97639</th>\n",
+       "      <td>1.000000</td>\n",
+       "      <td>0.5046</td>\n",
+       "      <td>0.618473</td>\n",
+       "      <td>0.04</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.428571</td>\n",
+       "      <td>0.155172</td>\n",
+       "      <td>0.458599</td>\n",
+       "      <td>0.288001</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.826087</td>\n",
+       "      <td>0.169492</td>\n",
+       "      <td>0.186441</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>95939</th>\n",
+       "      <td>1.000000</td>\n",
+       "      <td>0.7233</td>\n",
+       "      <td>0.686591</td>\n",
+       "      <td>0.02</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.714286</td>\n",
+       "      <td>0.034483</td>\n",
+       "      <td>0.866242</td>\n",
+       "      <td>0.585850</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.869565</td>\n",
+       "      <td>0.372881</td>\n",
+       "      <td>0.847458</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>117952</th>\n",
+       "      <td>1.000000</td>\n",
+       "      <td>0.7824</td>\n",
+       "      <td>0.710351</td>\n",
+       "      <td>0.96</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.714286</td>\n",
+       "      <td>0.086207</td>\n",
+       "      <td>0.458599</td>\n",
+       "      <td>0.007728</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.406780</td>\n",
+       "      <td>0.779661</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>43567</th>\n",
+       "      <td>1.000000</td>\n",
+       "      <td>0.7225</td>\n",
+       "      <td>0.468508</td>\n",
+       "      <td>1.00</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.714286</td>\n",
+       "      <td>0.051724</td>\n",
+       "      <td>0.458599</td>\n",
+       "      <td>0.617746</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.913043</td>\n",
+       "      <td>0.288136</td>\n",
+       "      <td>0.305085</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>135000 rows × 45 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "               a       b         c     d    e    f         g         h  \\\n",
+       "135569  1.000000  0.5217  0.635969  0.02  1.0  1.0  0.714286  0.620690   \n",
+       "78656   0.333333  0.7554  0.684908  0.02  0.0  1.0  0.428571  0.137931   \n",
+       "87437   1.000000  0.5437  0.741337  0.02  1.0  1.0  0.428571  0.793103   \n",
+       "131674  1.000000  0.7418  0.633959  1.00  1.0  1.0  0.714286  0.155172   \n",
+       "45535   1.000000  0.6463  0.693916  0.08  1.0  1.0  0.428571  0.379310   \n",
+       "...          ...     ...       ...   ...  ...  ...       ...       ...   \n",
+       "41993   1.000000  0.8063  0.831573  0.06  1.0  0.0  0.714286  0.155172   \n",
+       "97639   1.000000  0.5046  0.618473  0.04  0.0  1.0  0.428571  0.155172   \n",
+       "95939   1.000000  0.7233  0.686591  0.02  0.0  0.0  0.714286  0.034483   \n",
+       "117952  1.000000  0.7824  0.710351  0.96  1.0  1.0  0.714286  0.086207   \n",
+       "43567   1.000000  0.7225  0.468508  1.00  0.0  1.0  0.714286  0.051724   \n",
+       "\n",
+       "               j         k  ...  fecha_month_end  fecha_quarter_start  \\\n",
+       "135569  0.458599  0.636612  ...              0.0                  0.0   \n",
+       "78656   0.133758  0.633268  ...              0.0                  0.0   \n",
+       "87437   0.458599  0.735751  ...              0.0                  1.0   \n",
+       "131674  0.458599  0.529368  ...              0.0                  0.0   \n",
+       "45535   0.458599  0.049208  ...              0.0                  0.0   \n",
+       "...          ...       ...  ...              ...                  ...   \n",
+       "41993   0.312102  0.164571  ...              0.0                  0.0   \n",
+       "97639   0.458599  0.288001  ...              0.0                  0.0   \n",
+       "95939   0.866242  0.585850  ...              0.0                  0.0   \n",
+       "117952  0.458599  0.007728  ...              0.0                  0.0   \n",
+       "43567   0.458599  0.617746  ...              0.0                  0.0   \n",
+       "\n",
+       "        fecha_quarter_end  fecha_year_start  fecha_year_end  fecha_leap_year  \\\n",
+       "135569                0.0               0.0             0.0              0.0   \n",
+       "78656                 0.0               0.0             0.0              0.0   \n",
+       "87437                 0.0               0.0             0.0              0.0   \n",
+       "131674                0.0               0.0             0.0              0.0   \n",
+       "45535                 0.0               0.0             0.0              0.0   \n",
+       "...                   ...               ...             ...              ...   \n",
+       "41993                 0.0               0.0             0.0              0.0   \n",
+       "97639                 0.0               0.0             0.0              0.0   \n",
+       "95939                 0.0               0.0             0.0              0.0   \n",
+       "117952                0.0               0.0             0.0              0.0   \n",
+       "43567                 0.0               0.0             0.0              0.0   \n",
+       "\n",
+       "        fecha_days_in_month  fecha_hour  fecha_minute  fecha_second  \n",
+       "135569                  1.0    0.391304      0.525424      0.881356  \n",
+       "78656                   1.0    0.347826      0.254237      0.288136  \n",
+       "87437                   0.0    0.391304      0.050847      0.338983  \n",
+       "131674                  0.0    0.782609      0.915254      0.101695  \n",
+       "45535                   0.0    0.913043      0.406780      0.508475  \n",
+       "...                     ...         ...           ...           ...  \n",
+       "41993                   0.0    0.826087      0.067797      0.762712  \n",
+       "97639                   0.0    0.826087      0.169492      0.186441  \n",
+       "95939                   1.0    0.869565      0.372881      0.847458  \n",
+       "117952                  0.0    0.000000      0.406780      0.779661  \n",
+       "43567                   1.0    0.913043      0.288136      0.305085  \n",
+       "\n",
+       "[135000 rows x 45 columns]"
+      ]
+     },
+     "execution_count": 19,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "fraud_pipeline.transform(X_train)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['../models/feature_engineering_pipeline.joblib']"
+      ]
+     },
+     "execution_count": 20,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "joblib.dump(fraud_pipeline, '../models/feature_engineering_pipeline.joblib')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "fraud-detection",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.12"
+  },
+  "orig_nbformat": 4,
+  "vscode": {
+   "interpreter": {
+    "hash": "45e631c81adbf0cb55b2526738ae1a14c53cfa3f28a6ae1bee5619daf3ab935d"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

notebooks/03-feature_selection.ipynb ADDED Viewed

	@@ -0,0 +1,837 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import joblib\n",
+    "import pandas as pd\n",
+    "from feature_engine.selection import ProbeFeatureSelection\n",
+    "from sklearn.model_selection import train_test_split\n",
+    "from sklearn.ensemble import RandomForestClassifier\n",
+    "from sklearn.linear_model import LogisticRegression"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "(150000, 19)\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>a</th>\n",
+       "      <th>b</th>\n",
+       "      <th>c</th>\n",
+       "      <th>d</th>\n",
+       "      <th>e</th>\n",
+       "      <th>f</th>\n",
+       "      <th>g</th>\n",
+       "      <th>h</th>\n",
+       "      <th>j</th>\n",
+       "      <th>k</th>\n",
+       "      <th>l</th>\n",
+       "      <th>m</th>\n",
+       "      <th>n</th>\n",
+       "      <th>o</th>\n",
+       "      <th>p</th>\n",
+       "      <th>fecha</th>\n",
+       "      <th>monto</th>\n",
+       "      <th>score</th>\n",
+       "      <th>fraude</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>4</td>\n",
+       "      <td>0.6812</td>\n",
+       "      <td>50084.12</td>\n",
+       "      <td>50.0</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>20.0</td>\n",
+       "      <td>AR</td>\n",
+       "      <td>1</td>\n",
+       "      <td>cat_d26ab52</td>\n",
+       "      <td>0.365475</td>\n",
+       "      <td>2479.0</td>\n",
+       "      <td>952.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>Y</td>\n",
+       "      <td>2020-03-20 09:28:19</td>\n",
+       "      <td>57.63</td>\n",
+       "      <td>100</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>4</td>\n",
+       "      <td>0.6694</td>\n",
+       "      <td>66005.49</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>2.0</td>\n",
+       "      <td>AR</td>\n",
+       "      <td>1</td>\n",
+       "      <td>cat_ea962fb</td>\n",
+       "      <td>0.612728</td>\n",
+       "      <td>2603.0</td>\n",
+       "      <td>105.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>Y</td>\n",
+       "      <td>Y</td>\n",
+       "      <td>2020-03-09 13:58:28</td>\n",
+       "      <td>40.19</td>\n",
+       "      <td>25</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>4</td>\n",
+       "      <td>0.4718</td>\n",
+       "      <td>7059.05</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>0.463488</td>\n",
+       "      <td>92.0</td>\n",
+       "      <td>BR</td>\n",
+       "      <td>25</td>\n",
+       "      <td>cat_4c2544e</td>\n",
+       "      <td>0.651835</td>\n",
+       "      <td>2153.0</td>\n",
+       "      <td>249.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>Y</td>\n",
+       "      <td>Y</td>\n",
+       "      <td>2020-04-08 12:25:55</td>\n",
+       "      <td>5.77</td>\n",
+       "      <td>23</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>4</td>\n",
+       "      <td>0.7260</td>\n",
+       "      <td>10043.10</td>\n",
+       "      <td>24.0</td>\n",
+       "      <td>0.046845</td>\n",
+       "      <td>43.0</td>\n",
+       "      <td>BR</td>\n",
+       "      <td>43</td>\n",
+       "      <td>cat_1b59ee3</td>\n",
+       "      <td>0.692728</td>\n",
+       "      <td>4845.0</td>\n",
+       "      <td>141.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>N</td>\n",
+       "      <td>Y</td>\n",
+       "      <td>2020-03-14 11:46:13</td>\n",
+       "      <td>40.89</td>\n",
+       "      <td>23</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>4</td>\n",
+       "      <td>0.7758</td>\n",
+       "      <td>16584.42</td>\n",
+       "      <td>2.0</td>\n",
+       "      <td>0.154616</td>\n",
+       "      <td>54.0</td>\n",
+       "      <td>BR</td>\n",
+       "      <td>0</td>\n",
+       "      <td>cat_9bacaa5</td>\n",
+       "      <td>0.201354</td>\n",
+       "      <td>2856.0</td>\n",
+       "      <td>18.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>Y</td>\n",
+       "      <td>N</td>\n",
+       "      <td>2020-03-23 14:17:13</td>\n",
+       "      <td>18.98</td>\n",
+       "      <td>71</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   a       b         c     d         e     f   g   h            j         k  \\\n",
+       "0  4  0.6812  50084.12  50.0  0.000000  20.0  AR   1  cat_d26ab52  0.365475   \n",
+       "1  4  0.6694  66005.49   0.0  0.000000   2.0  AR   1  cat_ea962fb  0.612728   \n",
+       "2  4  0.4718   7059.05   4.0  0.463488  92.0  BR  25  cat_4c2544e  0.651835   \n",
+       "3  4  0.7260  10043.10  24.0  0.046845  43.0  BR  43  cat_1b59ee3  0.692728   \n",
+       "4  4  0.7758  16584.42   2.0  0.154616  54.0  BR   0  cat_9bacaa5  0.201354   \n",
+       "\n",
+       "        l      m  n    o  p                fecha  monto  score  fraude  \n",
+       "0  2479.0  952.0  1  NaN  Y  2020-03-20 09:28:19  57.63    100       0  \n",
+       "1  2603.0  105.0  1    Y  Y  2020-03-09 13:58:28  40.19     25       0  \n",
+       "2  2153.0  249.0  1    Y  Y  2020-04-08 12:25:55   5.77     23       0  \n",
+       "3  4845.0  141.0  1    N  Y  2020-03-14 11:46:13  40.89     23       0  \n",
+       "4  2856.0   18.0  1    Y  N  2020-03-23 14:17:13  18.98     71       0  "
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "data = pd.read_csv('../data/MercadoLibre Data Scientist Technical Challenge - Dataset.csv')\n",
+    "print(data.shape)\n",
+    "data.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "((135000, 18), (15000, 18))"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "X_train, X_test, y_train, y_test = train_test_split(\n",
+    "    data.drop(['fraude'], axis=1), # predictive variables\n",
+    "    data['fraude'], # target\n",
+    "    test_size=0.1, # portion of dataset to allocate to test set\n",
+    "    random_state=0, # we are setting the seed here\n",
+    ")\n",
+    "\n",
+    "X_train.shape, X_test.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fraud_pipeline = joblib.load('../models/feature_engineering_pipeline.joblib')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<style>#sk-container-id-1 {color: black;}#sk-container-id-1 pre{padding: 0;}#sk-container-id-1 div.sk-toggleable {background-color: white;}#sk-container-id-1 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-1 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-1 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-1 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-1 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-1 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-1 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-1 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-1 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-1 div.sk-item {position: relative;z-index: 1;}#sk-container-id-1 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-1 div.sk-item::before, #sk-container-id-1 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-1 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-1 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-1 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-1 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-1 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-1 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-1 div.sk-label-container {text-align: center;}#sk-container-id-1 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-1 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-1\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>Pipeline(steps=[(&#x27;missing_indicator&#x27;,\n",
+       "                 AddMissingIndicator(variables=[&#x27;b&#x27;, &#x27;c&#x27;, &#x27;d&#x27;, &#x27;f&#x27;, &#x27;g&#x27;, &#x27;l&#x27;,\n",
+       "                                                &#x27;m&#x27;, &#x27;o&#x27;])),\n",
+       "                (&#x27;numerical_imputer&#x27;,\n",
+       "                 MeanMedianImputer(variables=[&#x27;b&#x27;, &#x27;c&#x27;, &#x27;d&#x27;, &#x27;f&#x27;, &#x27;l&#x27;, &#x27;m&#x27;])),\n",
+       "                (&#x27;categorical_imputer&#x27;,\n",
+       "                 CategoricalImputer(fill_value=&#x27;missing&#x27;,\n",
+       "                                    variables=[&#x27;g&#x27;, &#x27;o&#x27;])),\n",
+       "                (&#x27;numerical_transformation&#x27;,\n",
+       "                 LogTransformer(variables=[&#x27;c&#x27;, &#x27;monto&#x27;])),\n",
+       "                (&#x27;binarizer&#x27;,\n",
+       "                 ArbitraryDiscretiser(binning_dict={&#x27;e&#x27;: [-inf, 0, inf],\n",
+       "                                                    &#x27;f&#x27;: [-inf, 0, inf]})),\n",
+       "                (&#x27;rare_label_encoder&#x27;,\n",
+       "                 RareLabelEncoder(n_categories=1, tol=0.001,\n",
+       "                                  variables=[&#x27;g&#x27;, &#x27;j&#x27;, &#x27;o&#x27;, &#x27;p&#x27;])),\n",
+       "                (&#x27;ordinal_encoder&#x27;,\n",
+       "                 OrdinalEncoder(variables=[&#x27;g&#x27;, &#x27;j&#x27;, &#x27;o&#x27;, &#x27;p&#x27;])),\n",
+       "                (&#x27;datetime_features&#x27;,\n",
+       "                 DatetimeFeatures(features_to_extract=&#x27;all&#x27;,\n",
+       "                                  variables=&#x27;fecha&#x27;)),\n",
+       "                (&#x27;scaler&#x27;, ScalerDf(method=&#x27;minmax&#x27;))])</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-1\" type=\"checkbox\" ><label for=\"sk-estimator-id-1\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">Pipeline</label><div class=\"sk-toggleable__content\"><pre>Pipeline(steps=[(&#x27;missing_indicator&#x27;,\n",
+       "                 AddMissingIndicator(variables=[&#x27;b&#x27;, &#x27;c&#x27;, &#x27;d&#x27;, &#x27;f&#x27;, &#x27;g&#x27;, &#x27;l&#x27;,\n",
+       "                                                &#x27;m&#x27;, &#x27;o&#x27;])),\n",
+       "                (&#x27;numerical_imputer&#x27;,\n",
+       "                 MeanMedianImputer(variables=[&#x27;b&#x27;, &#x27;c&#x27;, &#x27;d&#x27;, &#x27;f&#x27;, &#x27;l&#x27;, &#x27;m&#x27;])),\n",
+       "                (&#x27;categorical_imputer&#x27;,\n",
+       "                 CategoricalImputer(fill_value=&#x27;missing&#x27;,\n",
+       "                                    variables=[&#x27;g&#x27;, &#x27;o&#x27;])),\n",
+       "                (&#x27;numerical_transformation&#x27;,\n",
+       "                 LogTransformer(variables=[&#x27;c&#x27;, &#x27;monto&#x27;])),\n",
+       "                (&#x27;binarizer&#x27;,\n",
+       "                 ArbitraryDiscretiser(binning_dict={&#x27;e&#x27;: [-inf, 0, inf],\n",
+       "                                                    &#x27;f&#x27;: [-inf, 0, inf]})),\n",
+       "                (&#x27;rare_label_encoder&#x27;,\n",
+       "                 RareLabelEncoder(n_categories=1, tol=0.001,\n",
+       "                                  variables=[&#x27;g&#x27;, &#x27;j&#x27;, &#x27;o&#x27;, &#x27;p&#x27;])),\n",
+       "                (&#x27;ordinal_encoder&#x27;,\n",
+       "                 OrdinalEncoder(variables=[&#x27;g&#x27;, &#x27;j&#x27;, &#x27;o&#x27;, &#x27;p&#x27;])),\n",
+       "                (&#x27;datetime_features&#x27;,\n",
+       "                 DatetimeFeatures(features_to_extract=&#x27;all&#x27;,\n",
+       "                                  variables=&#x27;fecha&#x27;)),\n",
+       "                (&#x27;scaler&#x27;, ScalerDf(method=&#x27;minmax&#x27;))])</pre></div></div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-2\" type=\"checkbox\" ><label for=\"sk-estimator-id-2\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">AddMissingIndicator</label><div class=\"sk-toggleable__content\"><pre>AddMissingIndicator(variables=[&#x27;b&#x27;, &#x27;c&#x27;, &#x27;d&#x27;, &#x27;f&#x27;, &#x27;g&#x27;, &#x27;l&#x27;, &#x27;m&#x27;, &#x27;o&#x27;])</pre></div></div></div><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-3\" type=\"checkbox\" ><label for=\"sk-estimator-id-3\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">MeanMedianImputer</label><div class=\"sk-toggleable__content\"><pre>MeanMedianImputer(variables=[&#x27;b&#x27;, &#x27;c&#x27;, &#x27;d&#x27;, &#x27;f&#x27;, &#x27;l&#x27;, &#x27;m&#x27;])</pre></div></div></div><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-4\" type=\"checkbox\" ><label for=\"sk-estimator-id-4\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">CategoricalImputer</label><div class=\"sk-toggleable__content\"><pre>CategoricalImputer(fill_value=&#x27;missing&#x27;, variables=[&#x27;g&#x27;, &#x27;o&#x27;])</pre></div></div></div><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-5\" type=\"checkbox\" ><label for=\"sk-estimator-id-5\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">LogTransformer</label><div class=\"sk-toggleable__content\"><pre>LogTransformer(variables=[&#x27;c&#x27;, &#x27;monto&#x27;])</pre></div></div></div><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-6\" type=\"checkbox\" ><label for=\"sk-estimator-id-6\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">ArbitraryDiscretiser</label><div class=\"sk-toggleable__content\"><pre>ArbitraryDiscretiser(binning_dict={&#x27;e&#x27;: [-inf, 0, inf], &#x27;f&#x27;: [-inf, 0, inf]})</pre></div></div></div><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-7\" type=\"checkbox\" ><label for=\"sk-estimator-id-7\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">RareLabelEncoder</label><div class=\"sk-toggleable__content\"><pre>RareLabelEncoder(n_categories=1, tol=0.001, variables=[&#x27;g&#x27;, &#x27;j&#x27;, &#x27;o&#x27;, &#x27;p&#x27;])</pre></div></div></div><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-8\" type=\"checkbox\" ><label for=\"sk-estimator-id-8\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">OrdinalEncoder</label><div class=\"sk-toggleable__content\"><pre>OrdinalEncoder(variables=[&#x27;g&#x27;, &#x27;j&#x27;, &#x27;o&#x27;, &#x27;p&#x27;])</pre></div></div></div><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-9\" type=\"checkbox\" ><label for=\"sk-estimator-id-9\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">DatetimeFeatures</label><div class=\"sk-toggleable__content\"><pre>DatetimeFeatures(features_to_extract=&#x27;all&#x27;, variables=&#x27;fecha&#x27;)</pre></div></div></div><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-10\" type=\"checkbox\" ><label for=\"sk-estimator-id-10\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">ScalerDf</label><div class=\"sk-toggleable__content\"><pre>ScalerDf(method=&#x27;minmax&#x27;)</pre></div></div></div></div></div></div></div>"
+      ],
+      "text/plain": [
+       "Pipeline(steps=[('missing_indicator',\n",
+       "                 AddMissingIndicator(variables=['b', 'c', 'd', 'f', 'g', 'l',\n",
+       "                                                'm', 'o'])),\n",
+       "                ('numerical_imputer',\n",
+       "                 MeanMedianImputer(variables=['b', 'c', 'd', 'f', 'l', 'm'])),\n",
+       "                ('categorical_imputer',\n",
+       "                 CategoricalImputer(fill_value='missing',\n",
+       "                                    variables=['g', 'o'])),\n",
+       "                ('numerical_transformation',\n",
+       "                 LogTransformer(variables=['c', 'monto'])),\n",
+       "                ('binarizer',\n",
+       "                 ArbitraryDiscretiser(binning_dict={'e': [-inf, 0, inf],\n",
+       "                                                    'f': [-inf, 0, inf]})),\n",
+       "                ('rare_label_encoder',\n",
+       "                 RareLabelEncoder(n_categories=1, tol=0.001,\n",
+       "                                  variables=['g', 'j', 'o', 'p'])),\n",
+       "                ('ordinal_encoder',\n",
+       "                 OrdinalEncoder(variables=['g', 'j', 'o', 'p'])),\n",
+       "                ('datetime_features',\n",
+       "                 DatetimeFeatures(features_to_extract='all',\n",
+       "                                  variables='fecha')),\n",
+       "                ('scaler', ScalerDf(method='minmax'))])"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "fraud_pipeline"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "X_train_transformed = fraud_pipeline.transform(X_train)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>a</th>\n",
+       "      <th>b</th>\n",
+       "      <th>c</th>\n",
+       "      <th>d</th>\n",
+       "      <th>e</th>\n",
+       "      <th>f</th>\n",
+       "      <th>g</th>\n",
+       "      <th>h</th>\n",
+       "      <th>j</th>\n",
+       "      <th>k</th>\n",
+       "      <th>...</th>\n",
+       "      <th>fecha_month_end</th>\n",
+       "      <th>fecha_quarter_start</th>\n",
+       "      <th>fecha_quarter_end</th>\n",
+       "      <th>fecha_year_start</th>\n",
+       "      <th>fecha_year_end</th>\n",
+       "      <th>fecha_leap_year</th>\n",
+       "      <th>fecha_days_in_month</th>\n",
+       "      <th>fecha_hour</th>\n",
+       "      <th>fecha_minute</th>\n",
+       "      <th>fecha_second</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>135569</th>\n",
+       "      <td>1.000000</td>\n",
+       "      <td>0.5217</td>\n",
+       "      <td>0.635969</td>\n",
+       "      <td>0.02</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.714286</td>\n",
+       "      <td>0.620690</td>\n",
+       "      <td>0.458599</td>\n",
+       "      <td>0.636612</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.391304</td>\n",
+       "      <td>0.525424</td>\n",
+       "      <td>0.881356</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>78656</th>\n",
+       "      <td>0.333333</td>\n",
+       "      <td>0.7554</td>\n",
+       "      <td>0.684908</td>\n",
+       "      <td>0.02</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.428571</td>\n",
+       "      <td>0.137931</td>\n",
+       "      <td>0.133758</td>\n",
+       "      <td>0.633268</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.347826</td>\n",
+       "      <td>0.254237</td>\n",
+       "      <td>0.288136</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>87437</th>\n",
+       "      <td>1.000000</td>\n",
+       "      <td>0.5437</td>\n",
+       "      <td>0.741337</td>\n",
+       "      <td>0.02</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.428571</td>\n",
+       "      <td>0.793103</td>\n",
+       "      <td>0.458599</td>\n",
+       "      <td>0.735751</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.391304</td>\n",
+       "      <td>0.050847</td>\n",
+       "      <td>0.338983</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>131674</th>\n",
+       "      <td>1.000000</td>\n",
+       "      <td>0.7418</td>\n",
+       "      <td>0.633959</td>\n",
+       "      <td>1.00</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.714286</td>\n",
+       "      <td>0.155172</td>\n",
+       "      <td>0.458599</td>\n",
+       "      <td>0.529368</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.782609</td>\n",
+       "      <td>0.915254</td>\n",
+       "      <td>0.101695</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>45535</th>\n",
+       "      <td>1.000000</td>\n",
+       "      <td>0.6463</td>\n",
+       "      <td>0.693916</td>\n",
+       "      <td>0.08</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.428571</td>\n",
+       "      <td>0.379310</td>\n",
+       "      <td>0.458599</td>\n",
+       "      <td>0.049208</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.913043</td>\n",
+       "      <td>0.406780</td>\n",
+       "      <td>0.508475</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>41993</th>\n",
+       "      <td>1.000000</td>\n",
+       "      <td>0.8063</td>\n",
+       "      <td>0.831573</td>\n",
+       "      <td>0.06</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.714286</td>\n",
+       "      <td>0.155172</td>\n",
+       "      <td>0.312102</td>\n",
+       "      <td>0.164571</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.826087</td>\n",
+       "      <td>0.067797</td>\n",
+       "      <td>0.762712</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>97639</th>\n",
+       "      <td>1.000000</td>\n",
+       "      <td>0.5046</td>\n",
+       "      <td>0.618473</td>\n",
+       "      <td>0.04</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.428571</td>\n",
+       "      <td>0.155172</td>\n",
+       "      <td>0.458599</td>\n",
+       "      <td>0.288001</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.826087</td>\n",
+       "      <td>0.169492</td>\n",
+       "      <td>0.186441</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>95939</th>\n",
+       "      <td>1.000000</td>\n",
+       "      <td>0.7233</td>\n",
+       "      <td>0.686591</td>\n",
+       "      <td>0.02</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.714286</td>\n",
+       "      <td>0.034483</td>\n",
+       "      <td>0.866242</td>\n",
+       "      <td>0.585850</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.869565</td>\n",
+       "      <td>0.372881</td>\n",
+       "      <td>0.847458</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>117952</th>\n",
+       "      <td>1.000000</td>\n",
+       "      <td>0.7824</td>\n",
+       "      <td>0.710351</td>\n",
+       "      <td>0.96</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.714286</td>\n",
+       "      <td>0.086207</td>\n",
+       "      <td>0.458599</td>\n",
+       "      <td>0.007728</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.406780</td>\n",
+       "      <td>0.779661</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>43567</th>\n",
+       "      <td>1.000000</td>\n",
+       "      <td>0.7225</td>\n",
+       "      <td>0.468508</td>\n",
+       "      <td>1.00</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.714286</td>\n",
+       "      <td>0.051724</td>\n",
+       "      <td>0.458599</td>\n",
+       "      <td>0.617746</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.913043</td>\n",
+       "      <td>0.288136</td>\n",
+       "      <td>0.305085</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>135000 rows × 45 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "               a       b         c     d    e    f         g         h  \\\n",
+       "135569  1.000000  0.5217  0.635969  0.02  1.0  1.0  0.714286  0.620690   \n",
+       "78656   0.333333  0.7554  0.684908  0.02  0.0  1.0  0.428571  0.137931   \n",
+       "87437   1.000000  0.5437  0.741337  0.02  1.0  1.0  0.428571  0.793103   \n",
+       "131674  1.000000  0.7418  0.633959  1.00  1.0  1.0  0.714286  0.155172   \n",
+       "45535   1.000000  0.6463  0.693916  0.08  1.0  1.0  0.428571  0.379310   \n",
+       "...          ...     ...       ...   ...  ...  ...       ...       ...   \n",
+       "41993   1.000000  0.8063  0.831573  0.06  1.0  0.0  0.714286  0.155172   \n",
+       "97639   1.000000  0.5046  0.618473  0.04  0.0  1.0  0.428571  0.155172   \n",
+       "95939   1.000000  0.7233  0.686591  0.02  0.0  0.0  0.714286  0.034483   \n",
+       "117952  1.000000  0.7824  0.710351  0.96  1.0  1.0  0.714286  0.086207   \n",
+       "43567   1.000000  0.7225  0.468508  1.00  0.0  1.0  0.714286  0.051724   \n",
+       "\n",
+       "               j         k  ...  fecha_month_end  fecha_quarter_start  \\\n",
+       "135569  0.458599  0.636612  ...              0.0                  0.0   \n",
+       "78656   0.133758  0.633268  ...              0.0                  0.0   \n",
+       "87437   0.458599  0.735751  ...              0.0                  1.0   \n",
+       "131674  0.458599  0.529368  ...              0.0                  0.0   \n",
+       "45535   0.458599  0.049208  ...              0.0                  0.0   \n",
+       "...          ...       ...  ...              ...                  ...   \n",
+       "41993   0.312102  0.164571  ...              0.0                  0.0   \n",
+       "97639   0.458599  0.288001  ...              0.0                  0.0   \n",
+       "95939   0.866242  0.585850  ...              0.0                  0.0   \n",
+       "117952  0.458599  0.007728  ...              0.0                  0.0   \n",
+       "43567   0.458599  0.617746  ...              0.0                  0.0   \n",
+       "\n",
+       "        fecha_quarter_end  fecha_year_start  fecha_year_end  fecha_leap_year  \\\n",
+       "135569                0.0               0.0             0.0              0.0   \n",
+       "78656                 0.0               0.0             0.0              0.0   \n",
+       "87437                 0.0               0.0             0.0              0.0   \n",
+       "131674                0.0               0.0             0.0              0.0   \n",
+       "45535                 0.0               0.0             0.0              0.0   \n",
+       "...                   ...               ...             ...              ...   \n",
+       "41993                 0.0               0.0             0.0              0.0   \n",
+       "97639                 0.0               0.0             0.0              0.0   \n",
+       "95939                 0.0               0.0             0.0              0.0   \n",
+       "117952                0.0               0.0             0.0              0.0   \n",
+       "43567                 0.0               0.0             0.0              0.0   \n",
+       "\n",
+       "        fecha_days_in_month  fecha_hour  fecha_minute  fecha_second  \n",
+       "135569                  1.0    0.391304      0.525424      0.881356  \n",
+       "78656                   1.0    0.347826      0.254237      0.288136  \n",
+       "87437                   0.0    0.391304      0.050847      0.338983  \n",
+       "131674                  0.0    0.782609      0.915254      0.101695  \n",
+       "45535                   0.0    0.913043      0.406780      0.508475  \n",
+       "...                     ...         ...           ...           ...  \n",
+       "41993                   0.0    0.826087      0.067797      0.762712  \n",
+       "97639                   0.0    0.826087      0.169492      0.186441  \n",
+       "95939                   1.0    0.869565      0.372881      0.847458  \n",
+       "117952                  0.0    0.000000      0.406780      0.779661  \n",
+       "43567                   1.0    0.913043      0.288136      0.305085  \n",
+       "\n",
+       "[135000 rows x 45 columns]"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "X_train_transformed"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "sel = ProbeFeatureSelection(\n",
+    "    estimator=RandomForestClassifier(),\n",
+    "    scoring=\"roc_auc\",\n",
+    "    n_probes=3,\n",
+    "    distribution=\"all\",\n",
+    "    cv=3,\n",
+    "    random_state=150\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "X_tr = sel.fit_transform(X_train_transformed, y_train)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "(135000, 45) (135000, 13)\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(X_train_transformed.shape, X_tr.shape)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "selected_features = X_tr.columns"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pd.Series(selected_features).to_csv('../data/processed/selected_features.csv', index=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "fraud-detection",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.12"
+  },
+  "orig_nbformat": 4,
+  "vscode": {
+   "interpreter": {
+    "hash": "45e631c81adbf0cb55b2526738ae1a14c53cfa3f28a6ae1bee5619daf3ab935d"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

notebooks/04-model _training.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

notebooks/__pycache__/utils.cpython-310.pyc ADDED Viewed

Binary file (1.19 kB). View file

notebooks/logs.log ADDED Viewed

	@@ -0,0 +1,808 @@

+2023-10-08 15:23:18,818:WARNING:
+'cuml' is a soft dependency and not included in the pycaret installation. Please run: `pip install cuml` to install.
+2023-10-08 15:23:18,819:WARNING:
+'cuml' is a soft dependency and not included in the pycaret installation. Please run: `pip install cuml` to install.
+2023-10-08 15:23:18,819:WARNING:
+'cuml' is a soft dependency and not included in the pycaret installation. Please run: `pip install cuml` to install.
+2023-10-08 15:23:18,819:WARNING:
+'cuml' is a soft dependency and not included in the pycaret installation. Please run: `pip install cuml` to install.
+2023-10-08 15:38:13,449:WARNING:
+'cuml' is a soft dependency and not included in the pycaret installation. Please run: `pip install cuml` to install.
+2023-10-08 15:38:13,449:WARNING:
+'cuml' is a soft dependency and not included in the pycaret installation. Please run: `pip install cuml` to install.
+2023-10-08 15:38:13,449:WARNING:
+'cuml' is a soft dependency and not included in the pycaret installation. Please run: `pip install cuml` to install.
+2023-10-08 15:38:13,449:WARNING:
+'cuml' is a soft dependency and not included in the pycaret installation. Please run: `pip install cuml` to install.
+2023-10-08 15:40:39,103:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+2023-10-08 15:40:41,463:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+2023-10-08 15:40:43,785:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+2023-10-08 15:40:46,764:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+2023-10-08 15:40:48,451:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+2023-10-08 15:40:51,170:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+2023-10-08 15:40:53,845:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+2023-10-08 15:40:56,184:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+2023-10-08 15:40:59,289:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+2023-10-08 15:41:02,358:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+2023-10-08 15:41:04,033:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+2023-10-08 15:49:53,461:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:335: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
+  if is_sparse(dtype):
+2023-10-08 15:49:53,461:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:338: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
+2023-10-08 15:49:53,462:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:384: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  if is_categorical_dtype(dtype):
+2023-10-08 15:49:53,463:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:359: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
+2023-10-08 15:51:35,340:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:335: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
+  if is_sparse(dtype):
+2023-10-08 15:51:35,340:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:338: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
+2023-10-08 15:51:35,341:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:384: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  if is_categorical_dtype(dtype):
+2023-10-08 15:51:35,342:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:359: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
+2023-10-08 15:51:35,411:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:335: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
+  if is_sparse(dtype):
+2023-10-08 15:51:35,411:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:338: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
+2023-10-08 15:51:35,413:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:384: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  if is_categorical_dtype(dtype):
+2023-10-08 15:51:35,413:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:359: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
+2023-10-08 15:51:35,498:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:335: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
+  if is_sparse(dtype):
+2023-10-08 15:51:35,498:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:338: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
+2023-10-08 15:51:35,501:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:384: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  if is_categorical_dtype(dtype):
+2023-10-08 15:51:35,501:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:359: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
+2023-10-08 15:53:16,848:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:335: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
+  if is_sparse(dtype):
+2023-10-08 15:53:16,848:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:338: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
+2023-10-08 15:53:16,849:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:384: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  if is_categorical_dtype(dtype):
+2023-10-08 15:53:16,849:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:359: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
+2023-10-08 15:53:16,922:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:335: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
+  if is_sparse(dtype):
+2023-10-08 15:53:16,922:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:338: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
+2023-10-08 15:53:16,923:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:384: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  if is_categorical_dtype(dtype):
+2023-10-08 15:53:16,923:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:359: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
+2023-10-08 15:53:17,013:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:335: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
+  if is_sparse(dtype):
+2023-10-08 15:53:17,013:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:338: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
+2023-10-08 15:53:17,014:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:384: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  if is_categorical_dtype(dtype):
+2023-10-08 15:53:17,014:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:359: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
+2023-10-08 15:54:59,320:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:335: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
+  if is_sparse(dtype):
+2023-10-08 15:54:59,320:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:338: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
+2023-10-08 15:54:59,321:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:384: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  if is_categorical_dtype(dtype):
+2023-10-08 15:54:59,321:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:359: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
+2023-10-08 15:54:59,383:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:335: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
+  if is_sparse(dtype):
+2023-10-08 15:54:59,383:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:338: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
+2023-10-08 15:54:59,384:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:384: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  if is_categorical_dtype(dtype):
+2023-10-08 15:54:59,385:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:359: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
+2023-10-08 15:54:59,453:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:335: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
+  if is_sparse(dtype):
+2023-10-08 15:54:59,453:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:338: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
+2023-10-08 15:54:59,454:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:384: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  if is_categorical_dtype(dtype):
+2023-10-08 15:54:59,454:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:359: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
+2023-10-08 15:56:41,633:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:335: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
+  if is_sparse(dtype):
+2023-10-08 15:56:41,633:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:338: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
+2023-10-08 15:56:41,635:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:384: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  if is_categorical_dtype(dtype):
+2023-10-08 15:56:41,635:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:359: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
+2023-10-08 15:56:41,710:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:335: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
+  if is_sparse(dtype):
+2023-10-08 15:56:41,710:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:338: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
+2023-10-08 15:56:41,712:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:384: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  if is_categorical_dtype(dtype):
+2023-10-08 15:56:41,712:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:359: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
+2023-10-08 15:56:41,800:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:335: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
+  if is_sparse(dtype):
+2023-10-08 15:56:41,800:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:338: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
+2023-10-08 15:56:41,802:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:384: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  if is_categorical_dtype(dtype):
+2023-10-08 15:56:41,802:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:359: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
+2023-10-08 15:58:23,396:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:335: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
+  if is_sparse(dtype):
+2023-10-08 15:58:23,396:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:338: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
+2023-10-08 15:58:23,398:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:384: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  if is_categorical_dtype(dtype):
+2023-10-08 15:58:23,398:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:359: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
+2023-10-08 15:58:23,460:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:335: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
+  if is_sparse(dtype):
+2023-10-08 15:58:23,460:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:338: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
+2023-10-08 15:58:23,461:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:384: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  if is_categorical_dtype(dtype):
+2023-10-08 15:58:23,462:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:359: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
+2023-10-08 15:58:23,549:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:335: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
+  if is_sparse(dtype):
+2023-10-08 15:58:23,549:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:338: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
+2023-10-08 15:58:23,551:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:384: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  if is_categorical_dtype(dtype):
+2023-10-08 15:58:23,551:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:359: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
+2023-10-08 16:00:04,973:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:335: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
+  if is_sparse(dtype):
+2023-10-08 16:00:04,973:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:338: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
+2023-10-08 16:00:04,975:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:384: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  if is_categorical_dtype(dtype):
+2023-10-08 16:00:04,975:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:359: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
+2023-10-08 16:00:05,073:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:335: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
+  if is_sparse(dtype):
+2023-10-08 16:00:05,073:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:338: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
+2023-10-08 16:00:05,075:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:384: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  if is_categorical_dtype(dtype):
+2023-10-08 16:00:05,075:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:359: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
+2023-10-08 16:00:05,163:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:335: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
+  if is_sparse(dtype):
+2023-10-08 16:00:05,163:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:338: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
+2023-10-08 16:00:05,165:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:384: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  if is_categorical_dtype(dtype):
+2023-10-08 16:00:05,165:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:359: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
+2023-10-08 16:01:42,577:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:335: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
+  if is_sparse(dtype):
+2023-10-08 16:01:42,577:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:338: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
+2023-10-08 16:01:42,579:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:384: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  if is_categorical_dtype(dtype):
+2023-10-08 16:01:42,579:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:359: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
+2023-10-08 16:01:42,643:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:335: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
+  if is_sparse(dtype):
+2023-10-08 16:01:42,643:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:338: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
+2023-10-08 16:01:42,645:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:384: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  if is_categorical_dtype(dtype):
+2023-10-08 16:01:42,645:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:359: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
+2023-10-08 16:01:42,725:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:335: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
+  if is_sparse(dtype):
+2023-10-08 16:01:42,725:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:338: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
+2023-10-08 16:01:42,726:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:384: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  if is_categorical_dtype(dtype):
+2023-10-08 16:01:42,726:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:359: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
+2023-10-08 16:03:23,976:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:335: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
+  if is_sparse(dtype):
+2023-10-08 16:03:23,977:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:338: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
+2023-10-08 16:03:23,978:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:384: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  if is_categorical_dtype(dtype):
+2023-10-08 16:03:23,979:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:359: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
+2023-10-08 16:03:24,050:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:335: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
+  if is_sparse(dtype):
+2023-10-08 16:03:24,050:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:338: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
+2023-10-08 16:03:24,051:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:384: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  if is_categorical_dtype(dtype):
+2023-10-08 16:03:24,051:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:359: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
+2023-10-08 16:03:24,153:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:335: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
+  if is_sparse(dtype):
+2023-10-08 16:03:24,153:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:338: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
+2023-10-08 16:03:24,154:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:384: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  if is_categorical_dtype(dtype):
+2023-10-08 16:03:24,154:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:359: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
+2023-10-08 16:05:05,773:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:335: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
+  if is_sparse(dtype):
+2023-10-08 16:05:05,773:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:338: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
+2023-10-08 16:05:05,775:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:384: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  if is_categorical_dtype(dtype):
+2023-10-08 16:05:05,775:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:359: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
+2023-10-08 16:05:05,839:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:335: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
+  if is_sparse(dtype):
+2023-10-08 16:05:05,839:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:338: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
+2023-10-08 16:05:05,841:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:384: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  if is_categorical_dtype(dtype):
+2023-10-08 16:05:05,841:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:359: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
+2023-10-08 16:05:05,917:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:335: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
+  if is_sparse(dtype):
+2023-10-08 16:05:05,918:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:338: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
+2023-10-08 16:05:05,919:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:384: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  if is_categorical_dtype(dtype):
+2023-10-08 16:05:05,919:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:359: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
+2023-10-08 16:06:47,939:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:335: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
+  if is_sparse(dtype):
+2023-10-08 16:06:47,940:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:338: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
+2023-10-08 16:06:47,941:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:384: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  if is_categorical_dtype(dtype):
+2023-10-08 16:06:47,941:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:359: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
+2023-10-08 16:06:47,995:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:335: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
+  if is_sparse(dtype):
+2023-10-08 16:06:47,995:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:338: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
+2023-10-08 16:06:47,996:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:384: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  if is_categorical_dtype(dtype):
+2023-10-08 16:06:47,996:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:359: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
+2023-10-08 16:06:48,040:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:335: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
+  if is_sparse(dtype):
+2023-10-08 16:06:48,040:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:338: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
+2023-10-08 16:06:48,041:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:384: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  if is_categorical_dtype(dtype):
+2023-10-08 16:06:48,042:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:359: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
+2023-10-08 16:08:29,796:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:335: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
+  if is_sparse(dtype):
+2023-10-08 16:08:29,796:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:338: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
+2023-10-08 16:08:29,798:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:384: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  if is_categorical_dtype(dtype):
+2023-10-08 16:08:29,798:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:359: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
+2023-10-08 17:48:44,232:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+2023-10-08 17:48:46,174:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+2023-10-08 17:48:48,326:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+2023-10-08 17:48:49,750:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+2023-10-08 17:48:52,626:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+2023-10-08 17:48:55,008:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs failed to converge (status=1):
+STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
+Increase the number of iterations (max_iter) or scale the data as shown in:
+    https://scikit-learn.org/stable/modules/preprocessing.html
+Please also refer to the documentation for alternative solver options:
+    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
+  n_iter_i = _check_optimize_result(
+2023-10-08 17:54:25,847:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:335: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
+  if is_sparse(dtype):
+2023-10-08 17:54:25,847:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:338: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
+2023-10-08 17:54:25,848:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:384: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  if is_categorical_dtype(dtype):
+2023-10-08 17:54:25,848:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:359: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
+2023-10-08 17:55:54,180:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:335: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
+  if is_sparse(dtype):
+2023-10-08 17:55:54,181:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:338: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
+2023-10-08 17:55:54,182:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:384: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  if is_categorical_dtype(dtype):
+2023-10-08 17:55:54,183:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:359: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
+2023-10-08 17:55:54,249:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:335: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
+  if is_sparse(dtype):
+2023-10-08 17:55:54,250:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:338: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
+2023-10-08 17:55:54,251:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:384: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  if is_categorical_dtype(dtype):
+2023-10-08 17:55:54,251:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:359: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
+2023-10-08 17:55:54,332:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:335: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
+  if is_sparse(dtype):
+2023-10-08 17:55:54,332:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:338: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
+2023-10-08 17:55:54,334:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:384: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  if is_categorical_dtype(dtype):
+2023-10-08 17:55:54,334:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:359: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
+2023-10-08 17:57:34,908:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:335: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
+  if is_sparse(dtype):
+2023-10-08 17:57:34,908:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:338: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
+2023-10-08 17:57:34,909:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:384: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  if is_categorical_dtype(dtype):
+2023-10-08 17:57:34,909:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:359: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
+2023-10-08 17:57:34,982:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:335: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
+  if is_sparse(dtype):
+2023-10-08 17:57:34,982:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:338: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
+2023-10-08 17:57:34,983:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:384: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  if is_categorical_dtype(dtype):
+2023-10-08 17:57:34,983:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:359: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
+2023-10-08 17:57:35,076:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:335: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
+  if is_sparse(dtype):
+2023-10-08 17:57:35,076:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:338: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
+2023-10-08 17:57:35,078:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:384: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  if is_categorical_dtype(dtype):
+2023-10-08 17:57:35,078:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:359: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
+2023-10-08 17:59:19,597:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:335: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
+  if is_sparse(dtype):
+2023-10-08 17:59:19,598:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:338: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
+2023-10-08 17:59:19,600:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:384: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  if is_categorical_dtype(dtype):
+2023-10-08 17:59:19,601:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:359: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
+2023-10-08 17:59:19,692:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:335: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
+  if is_sparse(dtype):
+2023-10-08 17:59:19,692:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:338: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
+2023-10-08 17:59:19,695:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:384: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  if is_categorical_dtype(dtype):
+2023-10-08 17:59:19,695:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:359: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
+2023-10-08 17:59:19,800:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:335: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
+  if is_sparse(dtype):
+2023-10-08 17:59:19,800:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:338: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
+2023-10-08 17:59:19,801:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:384: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  if is_categorical_dtype(dtype):
+2023-10-08 17:59:19,801:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:359: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
+2023-10-08 18:01:03,749:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:335: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
+  if is_sparse(dtype):
+2023-10-08 18:01:03,749:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:338: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
+2023-10-08 18:01:03,750:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:384: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  if is_categorical_dtype(dtype):
+2023-10-08 18:01:03,751:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:359: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
+2023-10-08 18:01:03,849:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:335: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
+  if is_sparse(dtype):
+2023-10-08 18:01:03,850:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:338: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
+2023-10-08 18:01:03,851:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:384: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  if is_categorical_dtype(dtype):
+2023-10-08 18:01:03,851:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:359: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
+2023-10-08 18:01:03,972:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:335: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
+  if is_sparse(dtype):
+2023-10-08 18:01:03,972:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:338: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
+2023-10-08 18:01:03,974:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:384: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  if is_categorical_dtype(dtype):
+2023-10-08 18:01:03,974:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:359: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
+2023-10-08 18:02:46,388:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:335: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
+  if is_sparse(dtype):
+2023-10-08 18:02:46,388:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:338: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
+2023-10-08 18:02:46,389:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:384: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  if is_categorical_dtype(dtype):
+2023-10-08 18:02:46,389:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:359: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
+2023-10-08 18:02:46,460:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:335: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
+  if is_sparse(dtype):
+2023-10-08 18:02:46,460:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:338: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
+2023-10-08 18:02:46,461:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:384: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  if is_categorical_dtype(dtype):
+2023-10-08 18:02:46,461:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:359: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
+2023-10-08 18:02:46,520:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:335: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
+  if is_sparse(dtype):
+2023-10-08 18:02:46,520:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:338: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
+2023-10-08 18:02:46,521:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:384: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  if is_categorical_dtype(dtype):
+2023-10-08 18:02:46,521:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:359: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
+2023-10-08 18:04:29,787:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:335: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
+  if is_sparse(dtype):
+2023-10-08 18:04:29,788:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:338: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
+2023-10-08 18:04:29,789:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:384: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  if is_categorical_dtype(dtype):
+2023-10-08 18:04:29,789:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:359: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
+2023-10-08 18:23:23,859:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:335: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
+  if is_sparse(dtype):
+2023-10-08 18:23:23,860:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:338: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
+2023-10-08 18:23:23,861:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:384: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  if is_categorical_dtype(dtype):
+2023-10-08 18:23:23,861:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:359: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
+2023-10-08 18:23:23,863:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:520: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
+  if is_sparse(data):
+2023-10-08 18:25:09,016:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:335: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
+  if is_sparse(dtype):
+2023-10-08 18:25:09,016:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:338: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
+2023-10-08 18:25:09,019:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:384: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  if is_categorical_dtype(dtype):
+2023-10-08 18:25:09,019:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:359: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
+2023-10-08 20:08:20,046:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:335: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
+  if is_sparse(dtype):
+2023-10-08 20:08:20,055:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:338: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
+2023-10-08 20:08:20,057:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:384: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  if is_categorical_dtype(dtype):
+2023-10-08 20:08:20,058:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:359: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  return is_int or is_bool or is_float or is_categorical_dtype(dtype)
+2023-10-08 20:08:26,350:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:335: FutureWarning: is_sparse is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.SparseDtype)` instead.
+  if is_sparse(dtype):
+2023-10-08 20:08:26,351:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:338: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
+2023-10-08 20:08:26,352:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:384: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  if is_categorical_dtype(dtype):
+2023-10-08 20:08:26,352:WARNING:/home/wilmarsepulveda/anaconda3/envs/fraud-detection/lib/python3.10/site-packages/xgboost/data.py:359: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
+  return is_int or is_bool or is_float or is_categorical_dtype(dtype)

notebooks/utils.py ADDED Viewed

	@@ -0,0 +1,29 @@

+from sklearn.base import BaseEstimator, TransformerMixin
+from sklearn.preprocessing import MinMaxScaler, StandardScaler
+import pandas as pd
+class ScalerDf(BaseEstimator, TransformerMixin):
+    def __init__(self, method):
+        self.method = method
+    def transform(self, X):
+        X = pd.DataFrame(
+            self.scaler.transform(X),
+            columns=X.columns,
+            index=X.index
+        )
+        return X
+    def fit(self, X, y=None):
+        if self.method == 'minmax':
+            self.scaler = MinMaxScaler()
+        elif self.method == 'standard':
+            self.scaler = StandardScaler()
+        elif self.method == 'none':
+            return self
+        else:
+            raise ValueError("Invalid scaling method. Supported methods are 'minmax', 'standard', and 'none'.")
+        self.scaler.fit(X)
+        return self

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+## python 3.10
+pandas==2.1.1
+scikit-learn==1.3.1
+feature_engine==1.6.2
+xgboost==2.0.0
+gradio==3.35.2

src/__pycache__/utils.cpython-310.pyc ADDED Viewed

Binary file (1.19 kB). View file

src/app.py ADDED Viewed

	@@ -0,0 +1,70 @@

+import gradio as gr
+import pandas as pd
+import joblib
+import numpy as np
+import json
+data = pd.read_csv('data/MercadoLibre Data Scientist Technical Challenge - Dataset.csv')
+pipeline = joblib.load('models/final_pipeline.joblib')
+ls = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'j', 'k', 'l', 'm', 'n', 'o','p', 'fecha', 'monto', 'score']
+data = data[ls]
+def sentence_builder(a, b, c, d, e, f, g, h, j, k, l, m, n, o, p, fecha, monto, score):
+    ls = [a, b, c, d, e, f, g, h, j, k, l, m, n, o, p, fecha, monto, score]
+    df = pd.DataFrame(ls).T
+    df.columns = data.columns
+    df['a'] = df['a'].astype('int64')
+    df['b'] = df['b'].astype('float64')
+    df['c'] = df['c'].astype('float64')
+    df['d'] = df['d'].astype('float64')
+    df['e'] = df['e'].astype('float64')
+    df['f'] = df['f'].astype('float64')
+    df['g'] = df['g'].astype('object')
+    df['h'] = df['h'].astype('int64')
+    df['j'] = df['j'].astype('object')
+    df['k'] = df['k'].astype('float64')
+    df['l'] = df['l'].astype('float64')
+    df['m'] = df['m'].astype('float64')
+    df['n'] = df['n'].astype('int64')
+    df['o'] = df['o'].astype('object')
+    df['p'] = df['p'].astype('object')
+    df['fecha'] = df['fecha'].astype('object')
+    df['monto'] = df['monto'].astype('float64')
+    df['score'] = df['score'].astype('int64')
+    predict_proba = pipeline.predict_proba(df)[:, 1]
+    predict = np.where(predict_proba<0.05018921, 'No fraude', 'Fraude')
+    print(predict)
+    output = {'probability':str(predict_proba[0]),
+              'prediction':predict[0]}
+    print(output)
+    return json.dumps(output)
+demo = gr.Interface(
+    fn = sentence_builder,
+    inputs=[
+        gr.Number(value=4, label="a"),
+        gr.Number(value=0.5217, label="b"),
+        gr.Number(value=17889.0, label="c"),
+        gr.Number(value=1.0, label="d"),
+        gr.Number(value=0.2830350998, label="e"),
+        gr.Number(value=12.0, label="f"),
+        gr.Textbox(value="BR", label="g"),
+        gr.Number(value=36, label="h"),
+        gr.Textbox(value="cat_4744ece", label="j"),
+        gr.Number(value=0.6366103624, label="k"),
+        gr.Number(value=2470.0, label="l"),
+        gr.Number(value=308.0, label="m"),
+        gr.Number(value=1, label="n"),
+        gr.Textbox(value='Y', label="o"),
+        gr.Textbox(value="Y", label="p"),
+        gr.Textbox(value="2020-03-18 09:31:52", label="fecha"),
+        gr.Number(value=24.89, label="monto"),
+        gr.Number(value=93, label="score")
+        ],
+        outputs="json"
+        )
+if __name__ == "__main__":
+    demo.launch()

src/utils.py ADDED Viewed

	@@ -0,0 +1,29 @@

+from sklearn.base import BaseEstimator, TransformerMixin
+from sklearn.preprocessing import MinMaxScaler, StandardScaler
+import pandas as pd
+class ScalerDf(BaseEstimator, TransformerMixin):
+    def __init__(self, method):
+        self.method = method
+    def transform(self, X):
+        X = pd.DataFrame(
+            self.scaler.transform(X),
+            columns=X.columns,
+            index=X.index
+        )
+        return X
+    def fit(self, X, y=None):
+        if self.method == 'minmax':
+            self.scaler = MinMaxScaler()
+        elif self.method == 'standard':
+            self.scaler = StandardScaler()
+        elif self.method == 'none':
+            return self
+        else:
+            raise ValueError("Invalid scaling method. Supported methods are 'minmax', 'standard', and 'none'.")
+        self.scaler.fit(X)
+        return self