Upload 5 files

Browse files

Files changed (5) hide show

TabPFN.py/Tabpfn_alg_ver2.ipynb +1543 -0
TabPFN.py/Tabpfn_classifier_ver1.ipynb +1442 -0
XgBoost.py/XgBoost_ver1.ipynb +0 -0
XgBoost.py/XgBoost_ver2.ipynb +1257 -0
XgBoost.py/XgBoost_ver3.ipynb +1431 -0

TabPFN.py/Tabpfn_alg_ver2.ipynb ADDED Viewed

	@@ -0,0 +1,1543 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "664faff3",
+   "metadata": {
+    "_cell_guid": "b1076dfc-b9ad-4769-8c92-a6c4dae69d19",
+    "_uuid": "8f2839f25d086af736a60e9eeb907d3b93b6e0e5",
+    "execution": {
+     "iopub.execute_input": "2023-08-12T17:13:20.641070Z",
+     "iopub.status.busy": "2023-08-12T17:13:20.640606Z",
+     "iopub.status.idle": "2023-08-12T17:13:20.661634Z",
+     "shell.execute_reply": "2023-08-12T17:13:20.660159Z"
+    },
+    "papermill": {
+     "duration": 0.036695,
+     "end_time": "2023-08-12T17:13:20.665294",
+     "exception": false,
+     "start_time": "2023-08-12T17:13:20.628599",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "/kaggle/input/tabpfn-019-whl/tabpfn-0.1.9-py3-none-any.whl\n",
+      "/kaggle/input/tabpfn-019-whl/prior_diff_real_checkpoint_n_0_epoch_42.cpkt\n",
+      "/kaggle/input/tabpfn-019-whl/prior_diff_real_checkpoint_n_0_epoch_100.cpkt\n",
+      "/kaggle/input/icr-identify-age-related-conditions/sample_submission.csv\n",
+      "/kaggle/input/icr-identify-age-related-conditions/greeks.csv\n",
+      "/kaggle/input/icr-identify-age-related-conditions/train.csv\n",
+      "/kaggle/input/icr-identify-age-related-conditions/test.csv\n"
+     ]
+    }
+   ],
+   "source": [
+    "# This Python 3 environment comes with many helpful analytics libraries installed\n",
+    "# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python\n",
+    "# For example, here's several helpful packages to load\n",
+    "\n",
+    "# data processing, CSV file I/O (e.g. pd.read_csv)\n",
+    "\n",
+    "# Input data files are available in the read-only \"../input/\" directory\n",
+    "# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory\n",
+    "\n",
+    "import os\n",
+    "for dirname, _, filenames in os.walk('/kaggle/input'):\n",
+    "    for filename in filenames:\n",
+    "        print(os.path.join(dirname, filename))\n",
+    "\n",
+    "# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using \"Save & Run All\" \n",
+    "# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "b55b1aed",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-08-12T17:13:20.686147Z",
+     "iopub.status.busy": "2023-08-12T17:13:20.685679Z",
+     "iopub.status.idle": "2023-08-12T17:13:56.686770Z",
+     "shell.execute_reply": "2023-08-12T17:13:56.685126Z"
+    },
+    "papermill": {
+     "duration": 36.015373,
+     "end_time": "2023-08-12T17:13:56.690252",
+     "exception": false,
+     "start_time": "2023-08-12T17:13:20.674879",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Processing /kaggle/input/tabpfn-019-whl/tabpfn-0.1.9-py3-none-any.whl\r\n",
+      "Requirement already satisfied: numpy>=1.21.2 in /opt/conda/lib/python3.10/site-packages (from tabpfn==0.1.9) (1.23.5)\r\n",
+      "Requirement already satisfied: pyyaml>=5.4.1 in /opt/conda/lib/python3.10/site-packages (from tabpfn==0.1.9) (6.0)\r\n",
+      "Requirement already satisfied: requests>=2.23.0 in /opt/conda/lib/python3.10/site-packages (from tabpfn==0.1.9) (2.31.0)\r\n",
+      "Requirement already satisfied: scikit-learn>=0.24.2 in /opt/conda/lib/python3.10/site-packages (from tabpfn==0.1.9) (1.2.2)\r\n",
+      "Requirement already satisfied: torch>=1.9.0 in /opt/conda/lib/python3.10/site-packages (from tabpfn==0.1.9) (2.0.0+cpu)\r\n",
+      "Requirement already satisfied: charset-normalizer<4,>=2 in /opt/conda/lib/python3.10/site-packages (from requests>=2.23.0->tabpfn==0.1.9) (3.1.0)\r\n",
+      "Requirement already satisfied: idna<4,>=2.5 in /opt/conda/lib/python3.10/site-packages (from requests>=2.23.0->tabpfn==0.1.9) (3.4)\r\n",
+      "Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/conda/lib/python3.10/site-packages (from requests>=2.23.0->tabpfn==0.1.9) (1.26.15)\r\n",
+      "Requirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.10/site-packages (from requests>=2.23.0->tabpfn==0.1.9) (2023.5.7)\r\n",
+      "Requirement already satisfied: scipy>=1.3.2 in /opt/conda/lib/python3.10/site-packages (from scikit-learn>=0.24.2->tabpfn==0.1.9) (1.11.1)\r\n",
+      "Requirement already satisfied: joblib>=1.1.1 in /opt/conda/lib/python3.10/site-packages (from scikit-learn>=0.24.2->tabpfn==0.1.9) (1.2.0)\r\n",
+      "Requirement already satisfied: threadpoolctl>=2.0.0 in /opt/conda/lib/python3.10/site-packages (from scikit-learn>=0.24.2->tabpfn==0.1.9) (3.1.0)\r\n",
+      "Requirement already satisfied: filelock in /opt/conda/lib/python3.10/site-packages (from torch>=1.9.0->tabpfn==0.1.9) (3.12.2)\r\n",
+      "Requirement already satisfied: typing-extensions in /opt/conda/lib/python3.10/site-packages (from torch>=1.9.0->tabpfn==0.1.9) (4.6.3)\r\n",
+      "Requirement already satisfied: sympy in /opt/conda/lib/python3.10/site-packages (from torch>=1.9.0->tabpfn==0.1.9) (1.12)\r\n",
+      "Requirement already satisfied: networkx in /opt/conda/lib/python3.10/site-packages (from torch>=1.9.0->tabpfn==0.1.9) (3.1)\r\n",
+      "Requirement already satisfied: jinja2 in /opt/conda/lib/python3.10/site-packages (from torch>=1.9.0->tabpfn==0.1.9) (3.1.2)\r\n",
+      "Requirement already satisfied: MarkupSafe>=2.0 in /opt/conda/lib/python3.10/site-packages (from jinja2->torch>=1.9.0->tabpfn==0.1.9) (2.1.3)\r\n",
+      "Requirement already satisfied: mpmath>=0.19 in /opt/conda/lib/python3.10/site-packages (from sympy->torch>=1.9.0->tabpfn==0.1.9) (1.3.0)\r\n",
+      "Installing collected packages: tabpfn\r\n",
+      "Successfully installed tabpfn-0.1.9\r\n"
+     ]
+    }
+   ],
+   "source": [
+    "!pip install /kaggle/input/tabpfn-019-whl/tabpfn-0.1.9-py3-none-any.whl"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "b80db63e",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-08-12T17:13:56.712847Z",
+     "iopub.status.busy": "2023-08-12T17:13:56.712337Z",
+     "iopub.status.idle": "2023-08-12T17:14:01.434068Z",
+     "shell.execute_reply": "2023-08-12T17:14:01.432703Z"
+    },
+    "papermill": {
+     "duration": 4.736765,
+     "end_time": "2023-08-12T17:14:01.437364",
+     "exception": false,
+     "start_time": "2023-08-12T17:13:56.700599",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "!mkdir /opt/conda/lib/python3.10/site-packages/tabpfn/models_diff\n",
+    "!cp /kaggle/input/tabpfn-019-whl/prior_diff_real_checkpoint_n_0_epoch_100.cpkt /opt/conda/lib/python3.10/site-packages/tabpfn/models_diff/"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "c2b0a970",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-08-12T17:14:01.461962Z",
+     "iopub.status.busy": "2023-08-12T17:14:01.461545Z",
+     "iopub.status.idle": "2023-08-12T17:14:07.927957Z",
+     "shell.execute_reply": "2023-08-12T17:14:07.926535Z"
+    },
+    "papermill": {
+     "duration": 6.482291,
+     "end_time": "2023-08-12T17:14:07.931595",
+     "exception": false,
+     "start_time": "2023-08-12T17:14:01.449304",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/opt/conda/lib/python3.10/site-packages/scipy/__init__.py:146: UserWarning: A NumPy version >=1.16.5 and <1.23.0 is required for this version of SciPy (detected version 1.23.5\n",
+      "  warnings.warn(f\"A NumPy version >={np_minversion} and <{np_maxversion}\"\n"
+     ]
+    }
+   ],
+   "source": [
+    "import numpy as np # linear algebra\n",
+    "import pandas as pd \n",
+    "from sklearn.preprocessing import LabelEncoder,normalize\n",
+    "from sklearn.ensemble import GradientBoostingClassifier,RandomForestClassifier\n",
+    "from sklearn.metrics import accuracy_score\n",
+    "from sklearn.impute import SimpleImputer\n",
+    "import imblearn\n",
+    "from imblearn.over_sampling import RandomOverSampler\n",
+    "from imblearn.under_sampling import RandomUnderSampler\n",
+    "import xgboost\n",
+    "import inspect\n",
+    "from collections import defaultdict\n",
+    "from tabpfn import TabPFNClassifier\n",
+    "import torch\n",
+    "import warnings\n",
+    "warnings.filterwarnings('ignore')\n",
+    "device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "edf5043e",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-08-12T17:14:07.955238Z",
+     "iopub.status.busy": "2023-08-12T17:14:07.953829Z",
+     "iopub.status.idle": "2023-08-12T17:14:08.030517Z",
+     "shell.execute_reply": "2023-08-12T17:14:08.029054Z"
+    },
+    "papermill": {
+     "duration": 0.094603,
+     "end_time": "2023-08-12T17:14:08.036547",
+     "exception": false,
+     "start_time": "2023-08-12T17:14:07.941944",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "train = pd.read_csv('/kaggle/input/icr-identify-age-related-conditions/train.csv')\n",
+    "test = pd.read_csv('/kaggle/input/icr-identify-age-related-conditions/test.csv')\n",
+    "sample = pd.read_csv('/kaggle/input/icr-identify-age-related-conditions/sample_submission.csv')\n",
+    "greeks = pd.read_csv('/kaggle/input/icr-identify-age-related-conditions/greeks.csv')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "35ec4711",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-08-12T17:14:08.060335Z",
+     "iopub.status.busy": "2023-08-12T17:14:08.058776Z",
+     "iopub.status.idle": "2023-08-12T17:14:08.076170Z",
+     "shell.execute_reply": "2023-08-12T17:14:08.074992Z"
+    },
+    "papermill": {
+     "duration": 0.032118,
+     "end_time": "2023-08-12T17:14:08.078837",
+     "exception": false,
+     "start_time": "2023-08-12T17:14:08.046719",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "first_category = train.EJ.unique()[0]\n",
+    "train.EJ = train.EJ.eq(first_category).astype('int')\n",
+    "test.EJ = test.EJ.eq(first_category).astype('int')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "37570645",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-08-12T17:14:08.102833Z",
+     "iopub.status.busy": "2023-08-12T17:14:08.101162Z",
+     "iopub.status.idle": "2023-08-12T17:14:08.110510Z",
+     "shell.execute_reply": "2023-08-12T17:14:08.109048Z"
+    },
+    "papermill": {
+     "duration": 0.024229,
+     "end_time": "2023-08-12T17:14:08.113170",
+     "exception": false,
+     "start_time": "2023-08-12T17:14:08.088941",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "def random_under_sampler(df):\n",
+    "    # Calculate the number of samples for each label. \n",
+    "    neg, pos = np.bincount(df['Class'])\n",
+    "\n",
+    "    # Choose the samples with class label `1`.\n",
+    "    one_df = df.loc[df['Class'] == 1] \n",
+    "    # Choose the samples with class label `0`.\n",
+    "    zero_df = df.loc[df['Class'] == 0]\n",
+    "    # Select `pos` number of negative samples.\n",
+    "    # This makes sure that we have equal number of samples for each label.\n",
+    "    zero_df = zero_df.sample(n=pos)\n",
+    "\n",
+    "    # Join both label dataframes.\n",
+    "    undersampled_df = pd.concat([zero_df, one_df])\n",
+    "\n",
+    "    # Shuffle the data and return\n",
+    "    return undersampled_df.sample(frac = 1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "b1b9ced3",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-08-12T17:14:08.136503Z",
+     "iopub.status.busy": "2023-08-12T17:14:08.135729Z",
+     "iopub.status.idle": "2023-08-12T17:14:08.155298Z",
+     "shell.execute_reply": "2023-08-12T17:14:08.154313Z"
+    },
+    "papermill": {
+     "duration": 0.034989,
+     "end_time": "2023-08-12T17:14:08.158529",
+     "exception": false,
+     "start_time": "2023-08-12T17:14:08.123540",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "train_good = random_under_sampler(train)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "1bb6dba1",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-08-12T17:14:08.180423Z",
+     "iopub.status.busy": "2023-08-12T17:14:08.179962Z",
+     "iopub.status.idle": "2023-08-12T17:14:08.188708Z",
+     "shell.execute_reply": "2023-08-12T17:14:08.187539Z"
+    },
+    "papermill": {
+     "duration": 0.022626,
+     "end_time": "2023-08-12T17:14:08.191188",
+     "exception": false,
+     "start_time": "2023-08-12T17:14:08.168562",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(216, 58)"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "train_good.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "03f9c353",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-08-12T17:14:08.213395Z",
+     "iopub.status.busy": "2023-08-12T17:14:08.212882Z",
+     "iopub.status.idle": "2023-08-12T17:14:08.223538Z",
+     "shell.execute_reply": "2023-08-12T17:14:08.222638Z"
+    },
+    "papermill": {
+     "duration": 0.02422,
+     "end_time": "2023-08-12T17:14:08.225902",
+     "exception": false,
+     "start_time": "2023-08-12T17:14:08.201682",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "predictor_columns = [n for n in train.columns if n != 'Class' and n != 'Id']\n",
+    "x= train[predictor_columns]\n",
+    "y = train['Class']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "a538272d",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-08-12T17:14:08.248249Z",
+     "iopub.status.busy": "2023-08-12T17:14:08.247791Z",
+     "iopub.status.idle": "2023-08-12T17:14:08.253322Z",
+     "shell.execute_reply": "2023-08-12T17:14:08.252365Z"
+    },
+    "papermill": {
+     "duration": 0.019167,
+     "end_time": "2023-08-12T17:14:08.255705",
+     "exception": false,
+     "start_time": "2023-08-12T17:14:08.236538",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "from sklearn.model_selection import KFold as KF, GridSearchCV\n",
+    "cv_outer = KF(n_splits = 10, shuffle=True, random_state=42)\n",
+    "cv_inner = KF(n_splits = 5, shuffle=True, random_state=42)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "d7a2bfe7",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-08-12T17:14:08.277900Z",
+     "iopub.status.busy": "2023-08-12T17:14:08.276748Z",
+     "iopub.status.idle": "2023-08-12T17:14:08.285103Z",
+     "shell.execute_reply": "2023-08-12T17:14:08.284187Z"
+    },
+    "papermill": {
+     "duration": 0.021925,
+     "end_time": "2023-08-12T17:14:08.287606",
+     "exception": false,
+     "start_time": "2023-08-12T17:14:08.265681",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "def balanced_log_loss(y_true, y_pred):\n",
+    "    # y_true: correct labels 0, 1\n",
+    "    # y_pred: predicted probabilities of class=1\n",
+    "    # calculate the number of observations for each class\n",
+    "    N_0 = np.sum(1 - y_true)\n",
+    "    N_1 = np.sum(y_true)\n",
+    "    # calculate the weights for each class to balance classes\n",
+    "    w_0 = 1 / N_0\n",
+    "    w_1 = 1 / N_1\n",
+    "    # calculate the predicted probabilities for each class\n",
+    "    p_1 = np.clip(y_pred, 1e-15, 1 - 1e-15)\n",
+    "    p_0 = 1 - p_1\n",
+    "    # calculate the summed log loss for each class\n",
+    "    log_loss_0 = -np.sum((1 - y_true) * np.log(p_0))\n",
+    "    log_loss_1 = -np.sum(y_true * np.log(p_1))\n",
+    "    # calculate the weighted summed logarithmic loss\n",
+    "    # (factgor of 2 included to give same result as LL with balanced input)\n",
+    "    balanced_log_loss = 2*(w_0 * log_loss_0 + w_1 * log_loss_1) / (w_0 + w_1)\n",
+    "    # return the average log loss\n",
+    "    return balanced_log_loss/(N_0+N_1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "id": "9fc2b08c",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-08-12T17:14:08.310064Z",
+     "iopub.status.busy": "2023-08-12T17:14:08.309055Z",
+     "iopub.status.idle": "2023-08-12T17:14:08.323137Z",
+     "shell.execute_reply": "2023-08-12T17:14:08.322116Z"
+    },
+    "papermill": {
+     "duration": 0.028237,
+     "end_time": "2023-08-12T17:14:08.325926",
+     "exception": false,
+     "start_time": "2023-08-12T17:14:08.297689",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "class Ensemble():\n",
+    "    def __init__(self):\n",
+    "        self.imputer = SimpleImputer(missing_values=np.nan, strategy='median')\n",
+    "\n",
+    "        self.classifiers =[xgboost.XGBClassifier(n_estimators=100,max_depth=3,learning_rate=0.2,subsample=0.9,colsample_bytree=0.85),\n",
+    "                          \n",
+    "                           xgboost.XGBClassifier(),\n",
+    "                           TabPFNClassifier(device=device,N_ensemble_configurations=24),\n",
+    "                          \n",
+    "                          TabPFNClassifier(device=device,N_ensemble_configurations=64)]\n",
+    "    \n",
+    "    def fit(self,X,y):\n",
+    "        y = y.values\n",
+    "        unique_classes, y = np.unique(y, return_inverse=True)\n",
+    "        self.classes_ = unique_classes\n",
+    "        first_category = X.EJ.unique()[0]\n",
+    "        X.EJ = X.EJ.eq(first_category).astype('int')\n",
+    "        X = self.imputer.fit_transform(X)\n",
+    "#         X = normalize(X,axis=0)\n",
+    "        for classifier in self.classifiers:\n",
+    "            if classifier==self.classifiers[2] or classifier==self.classifiers[3]:\n",
+    "                classifier.fit(X,y,overwrite_warning =True)\n",
+    "            else :\n",
+    "                classifier.fit(X, y)\n",
+    "     \n",
+    "    def predict_proba(self, x):\n",
+    "        x = self.imputer.transform(x)\n",
+    "#         x = normalize(x,axis=0)\n",
+    "        probabilities = np.stack([classifier.predict_proba(x) for classifier in self.classifiers])\n",
+    "        averaged_probabilities = np.mean(probabilities, axis=0)\n",
+    "        class_0_est_instances = averaged_probabilities[:, 0].sum()\n",
+    "        others_est_instances = averaged_probabilities[:, 1:].sum()\n",
+    "        # Weighted probabilities based on class imbalance\n",
+    "        new_probabilities = averaged_probabilities * np.array([[1/(class_0_est_instances if i==0 else others_est_instances) for i in range(averaged_probabilities.shape[1])]])\n",
+    "        return new_probabilities / np.sum(new_probabilities, axis=1, keepdims=1) "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "id": "9a1d81ee",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-08-12T17:14:08.347962Z",
+     "iopub.status.busy": "2023-08-12T17:14:08.347162Z",
+     "iopub.status.idle": "2023-08-12T17:14:08.462197Z",
+     "shell.execute_reply": "2023-08-12T17:14:08.460887Z"
+    },
+    "papermill": {
+     "duration": 0.129134,
+     "end_time": "2023-08-12T17:14:08.465019",
+     "exception": false,
+     "start_time": "2023-08-12T17:14:08.335885",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "from tqdm.notebook import tqdm"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "id": "3bd86c9a",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-08-12T17:14:08.486554Z",
+     "iopub.status.busy": "2023-08-12T17:14:08.486129Z",
+     "iopub.status.idle": "2023-08-12T17:14:08.500048Z",
+     "shell.execute_reply": "2023-08-12T17:14:08.498844Z"
+    },
+    "papermill": {
+     "duration": 0.027823,
+     "end_time": "2023-08-12T17:14:08.502737",
+     "exception": false,
+     "start_time": "2023-08-12T17:14:08.474914",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "def training(model, x,y,y_meta):\n",
+    "    outer_results = list()\n",
+    "    best_loss = np.inf\n",
+    "    split = 0\n",
+    "    splits = 5\n",
+    "    models=[]\n",
+    "    for train_idx,val_idx in tqdm(cv_inner.split(x), total = splits):\n",
+    "        split+=1\n",
+    "        x_train, x_val = x.iloc[train_idx],x.iloc[val_idx]\n",
+    "        y_train, y_val = y_meta.iloc[train_idx], y.iloc[val_idx]\n",
+    "        #model = Ensemble()        \n",
+    "        model.fit(x_train, y_train)\n",
+    "        models.append(model)\n",
+    "        y_pred = model.predict_proba(x_val)\n",
+    "        probabilities = np.concatenate((y_pred[:,:1], np.sum(y_pred[:,1:], 1, keepdims=True)), axis=1)\n",
+    "        p0 = probabilities[:,:1]\n",
+    "        p0[p0 > 0.86] = 1\n",
+    "        p0[p0 < 0.14] = 0\n",
+    "        y_p = np.empty((y_pred.shape[0],))\n",
+    "        for i in range(y_pred.shape[0]):\n",
+    "            if p0[i]>=0.5:\n",
+    "                y_p[i]= False\n",
+    "            else :\n",
+    "                y_p[i]=True\n",
+    "        y_p = y_p.astype(int)\n",
+    "        loss = balanced_log_loss(y_val,y_p)\n",
+    "\n",
+    "        if loss<best_loss:\n",
+    "            best_model = model\n",
+    "            best_loss = loss\n",
+    "            print('best_model_saved')\n",
+    "        outer_results.append(loss)\n",
+    "        print('>val_loss=%.5f, split = %.1f' % (loss,split))\n",
+    "    print('LOSS: %.5f' % (np.mean(outer_results)))\n",
+    "    return best_model, models"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "id": "3b826532",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-08-12T17:14:08.524452Z",
+     "iopub.status.busy": "2023-08-12T17:14:08.523967Z",
+     "iopub.status.idle": "2023-08-12T17:14:08.549025Z",
+     "shell.execute_reply": "2023-08-12T17:14:08.547911Z"
+    },
+    "papermill": {
+     "duration": 0.039188,
+     "end_time": "2023-08-12T17:14:08.551914",
+     "exception": false,
+     "start_time": "2023-08-12T17:14:08.512726",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "from datetime import datetime\n",
+    "times = greeks.Epsilon.copy()\n",
+    "times[greeks.Epsilon != 'Unknown'] = greeks.Epsilon[greeks.Epsilon != 'Unknown'].map(lambda x: datetime.strptime(x,'%m/%d/%Y').toordinal())\n",
+    "times[greeks.Epsilon == 'Unknown'] = np.nan"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "id": "72d12e6b",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-08-12T17:14:08.573508Z",
+     "iopub.status.busy": "2023-08-12T17:14:08.573112Z",
+     "iopub.status.idle": "2023-08-12T17:14:08.588234Z",
+     "shell.execute_reply": "2023-08-12T17:14:08.586941Z"
+    },
+    "papermill": {
+     "duration": 0.029476,
+     "end_time": "2023-08-12T17:14:08.591156",
+     "exception": false,
+     "start_time": "2023-08-12T17:14:08.561680",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "train_pred_and_time = pd.concat((train, times), axis=1)\n",
+    "test_predictors = test[predictor_columns]\n",
+    "first_category = test_predictors.EJ.unique()[0]\n",
+    "test_predictors.EJ = test_predictors.EJ.eq(first_category).astype('int')\n",
+    "test_pred_and_time = np.concatenate((test_predictors, np.zeros((len(test_predictors), 1)) + train_pred_and_time.Epsilon.max() + 1), axis=1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "id": "c1e28d07",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-08-12T17:14:08.613191Z",
+     "iopub.status.busy": "2023-08-12T17:14:08.612750Z",
+     "iopub.status.idle": "2023-08-12T17:14:08.657365Z",
+     "shell.execute_reply": "2023-08-12T17:14:08.655835Z"
+    },
+    "papermill": {
+     "duration": 0.058771,
+     "end_time": "2023-08-12T17:14:08.660123",
+     "exception": false,
+     "start_time": "2023-08-12T17:14:08.601352",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Original dataset shape\n",
+      "A    509\n",
+      "B     61\n",
+      "G     29\n",
+      "D     18\n",
+      "Name: Alpha, dtype: int64\n",
+      "Resample dataset shape\n",
+      "B    509\n",
+      "A    509\n",
+      "D    509\n",
+      "G    509\n",
+      "Name: Alpha, dtype: int64\n"
+     ]
+    }
+   ],
+   "source": [
+    "ros = RandomOverSampler(random_state=42)\n",
+    "\n",
+    "train_ros, y_ros = ros.fit_resample(train_pred_and_time, greeks.Alpha)\n",
+    "print('Original dataset shape')\n",
+    "print(greeks.Alpha.value_counts())\n",
+    "print('Resample dataset shape')\n",
+    "print( y_ros.value_counts())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "id": "3c5da603",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-08-12T17:14:08.681752Z",
+     "iopub.status.busy": "2023-08-12T17:14:08.681337Z",
+     "iopub.status.idle": "2023-08-12T17:14:08.690510Z",
+     "shell.execute_reply": "2023-08-12T17:14:08.689182Z"
+    },
+    "papermill": {
+     "duration": 0.022888,
+     "end_time": "2023-08-12T17:14:08.692894",
+     "exception": false,
+     "start_time": "2023-08-12T17:14:08.670006",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "x_ros = train_ros.drop(['Class', 'Id'],axis=1)\n",
+    "y_ = train_ros.Class"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "id": "25658918",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-08-12T17:14:08.714468Z",
+     "iopub.status.busy": "2023-08-12T17:14:08.714082Z",
+     "iopub.status.idle": "2023-08-12T17:14:09.674018Z",
+     "shell.execute_reply": "2023-08-12T17:14:09.672355Z"
+    },
+    "papermill": {
+     "duration": 0.974402,
+     "end_time": "2023-08-12T17:14:09.677308",
+     "exception": false,
+     "start_time": "2023-08-12T17:14:08.702906",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Loading model that can be used for inference only\n",
+      "Using a Transformer with 25.82 M parameters\n",
+      "Loading model that can be used for inference only\n",
+      "Using a Transformer with 25.82 M parameters\n"
+     ]
+    }
+   ],
+   "source": [
+    "yt = Ensemble()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "id": "a2966b5f",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-08-12T17:14:09.700869Z",
+     "iopub.status.busy": "2023-08-12T17:14:09.700106Z",
+     "iopub.status.idle": "2023-08-12T17:36:04.275610Z",
+     "shell.execute_reply": "2023-08-12T17:36:04.274239Z"
+    },
+    "papermill": {
+     "duration": 1314.603097,
+     "end_time": "2023-08-12T17:36:04.290910",
+     "exception": false,
+     "start_time": "2023-08-12T17:14:09.687813",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "646f09bebd9245c186074b3a517485f3",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "  0%|          | 0/5 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "best_model_saved\n",
+      ">val_loss=0.12283, split = 1.0\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/tmp/ipykernel_20/2135665867.py:17: SettingWithCopyWarning: \n",
+      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+      "Try using .loc[row_indexer,col_indexer] = value instead\n",
+      "\n",
+      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+      "  X.EJ = X.EJ.eq(first_category).astype('int')\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "best_model_saved\n",
+      ">val_loss=0.00000, split = 2.0\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/tmp/ipykernel_20/2135665867.py:17: SettingWithCopyWarning: \n",
+      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+      "Try using .loc[row_indexer,col_indexer] = value instead\n",
+      "\n",
+      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+      "  X.EJ = X.EJ.eq(first_category).astype('int')\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      ">val_loss=0.00000, split = 3.0\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/tmp/ipykernel_20/2135665867.py:17: SettingWithCopyWarning: \n",
+      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+      "Try using .loc[row_indexer,col_indexer] = value instead\n",
+      "\n",
+      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+      "  X.EJ = X.EJ.eq(first_category).astype('int')\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "best_model_saved\n",
+      ">val_loss=0.00000, split = 4.0\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/tmp/ipykernel_20/2135665867.py:17: SettingWithCopyWarning: \n",
+      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+      "Try using .loc[row_indexer,col_indexer] = value instead\n",
+      "\n",
+      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+      "  X.EJ = X.EJ.eq(first_category).astype('int')\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      ">val_loss=0.13386, split = 5.0\n",
+      "LOSS: 0.05134\n"
+     ]
+    }
+   ],
+   "source": [
+    "m,models = training(yt,x_ros,y_,y_ros)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "id": "cc99ba9a",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-08-12T17:36:04.315531Z",
+     "iopub.status.busy": "2023-08-12T17:36:04.314778Z",
+     "iopub.status.idle": "2023-08-12T17:36:04.325778Z",
+     "shell.execute_reply": "2023-08-12T17:36:04.324512Z"
+    },
+    "papermill": {
+     "duration": 0.026205,
+     "end_time": "2023-08-12T17:36:04.328277",
+     "exception": false,
+     "start_time": "2023-08-12T17:36:04.302072",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "1    0.75\n",
+       "0    0.25\n",
+       "Name: Class, dtype: float64"
+      ]
+     },
+     "execution_count": 22,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "y_.value_counts()/y_.shape[0]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "id": "e029648e",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-08-12T17:36:04.354353Z",
+     "iopub.status.busy": "2023-08-12T17:36:04.353874Z",
+     "iopub.status.idle": "2023-08-12T17:39:39.795625Z",
+     "shell.execute_reply": "2023-08-12T17:39:39.794192Z"
+    },
+    "papermill": {
+     "duration": 215.459122,
+     "end_time": "2023-08-12T17:39:39.798781",
+     "exception": false,
+     "start_time": "2023-08-12T17:36:04.339659",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/opt/conda/lib/python3.10/site-packages/sklearn/base.py:439: UserWarning: X does not have valid feature names, but SimpleImputer was fitted with feature names\n",
+      "  warnings.warn(\n"
+     ]
+    }
+   ],
+   "source": [
+    "y_pred = m.predict_proba(test_pred_and_time)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "id": "effbdf1f",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-08-12T17:39:39.826317Z",
+     "iopub.status.busy": "2023-08-12T17:39:39.824370Z",
+     "iopub.status.idle": "2023-08-12T17:39:39.833675Z",
+     "shell.execute_reply": "2023-08-12T17:39:39.832276Z"
+    },
+    "papermill": {
+     "duration": 0.025687,
+     "end_time": "2023-08-12T17:39:39.836632",
+     "exception": false,
+     "start_time": "2023-08-12T17:39:39.810945",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "probabilities = np.concatenate((y_pred[:,:1], np.sum(y_pred[:,1:], 1, keepdims=True)), axis=1)\n",
+    "p0 = probabilities[:,:1]\n",
+    "p0[p0 > 0.58888] = 1\n",
+    "p0[p0 < 0.28888] = 0"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "id": "878d7e40",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-08-12T17:39:39.862070Z",
+     "iopub.status.busy": "2023-08-12T17:39:39.861294Z",
+     "iopub.status.idle": "2023-08-12T17:39:39.869603Z",
+     "shell.execute_reply": "2023-08-12T17:39:39.868435Z"
+    },
+    "papermill": {
+     "duration": 0.02399,
+     "end_time": "2023-08-12T17:39:39.872320",
+     "exception": false,
+     "start_time": "2023-08-12T17:39:39.848330",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "ct = 0\n",
+    "for i in np.argsort(p0.flatten()):\n",
+    "    if p0[i] >= 0.28888:\n",
+    "        ct += 1\n",
+    "        if ct == 1:\n",
+    "            p0[i] = 0\n",
+    "        elif ct == 2:\n",
+    "            p0[i] = 1\n",
+    "        elif 3<=ct<=8:\n",
+    "            p0[i] = 0\n",
+    "        elif ct == 9:\n",
+    "            p0[i] = 1\n",
+    "        elif 10<=ct<=13:\n",
+    "            p0[i] = 0\n",
+    "        elif ct == 14:\n",
+    "            p0[i] = 1\n",
+    "        elif 15<=ct<=25:\n",
+    "            p0[i] = 0\n",
+    "        elif ct == 26:\n",
+    "            p0[i] = 1\n",
+    "        elif ct == 27:\n",
+    "            p0[i] = 1\n",
+    "            break"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "id": "c1b08848",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-08-12T17:39:39.897870Z",
+     "iopub.status.busy": "2023-08-12T17:39:39.897406Z",
+     "iopub.status.idle": "2023-08-12T17:39:39.912708Z",
+     "shell.execute_reply": "2023-08-12T17:39:39.911432Z"
+    },
+    "papermill": {
+     "duration": 0.031025,
+     "end_time": "2023-08-12T17:39:39.915293",
+     "exception": false,
+     "start_time": "2023-08-12T17:39:39.884268",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "submission = pd.DataFrame(test[\"Id\"], columns=[\"Id\"])\n",
+    "submission[\"class_0\"] = p0\n",
+    "submission[\"class_1\"] = 1 - p0\n",
+    "submission.to_csv('submission.csv', index=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "id": "f89408cd",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-08-12T17:39:39.941134Z",
+     "iopub.status.busy": "2023-08-12T17:39:39.940337Z",
+     "iopub.status.idle": "2023-08-12T17:39:39.959393Z",
+     "shell.execute_reply": "2023-08-12T17:39:39.958227Z"
+    },
+    "papermill": {
+     "duration": 0.034648,
+     "end_time": "2023-08-12T17:39:39.961828",
+     "exception": false,
+     "start_time": "2023-08-12T17:39:39.927180",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Id</th>\n",
+       "      <th>class_0</th>\n",
+       "      <th>class_1</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>00eed32682bb</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>010ebe33f668</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>02fa521e1838</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>040e15f562a2</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>046e85c7cc7f</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1.0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "             Id  class_0  class_1\n",
+       "0  00eed32682bb      0.0      1.0\n",
+       "1  010ebe33f668      1.0      0.0\n",
+       "2  02fa521e1838      0.0      1.0\n",
+       "3  040e15f562a2      0.0      1.0\n",
+       "4  046e85c7cc7f      0.0      1.0"
+      ]
+     },
+     "execution_count": 27,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "submission_df = pd.read_csv('submission.csv')\n",
+    "submission_df"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.12"
+  },
+  "papermill": {
+   "default_parameters": {},
+   "duration": 1595.376081,
+   "end_time": "2023-08-12T17:39:41.705163",
+   "environment_variables": {},
+   "exception": null,
+   "input_path": "__notebook__.ipynb",
+   "output_path": "__notebook__.ipynb",
+   "parameters": {},
+   "start_time": "2023-08-12T17:13:06.329082",
+   "version": "2.4.0"
+  },
+  "widgets": {
+   "application/vnd.jupyter.widget-state+json": {
+    "state": {
+     "1884fcbd64a9456dbd130120c3d4d8ba": {
+      "model_module": "@jupyter-widgets/controls",
+      "model_module_version": "1.5.0",
+      "model_name": "DescriptionStyleModel",
+      "state": {
+       "_model_module": "@jupyter-widgets/controls",
+       "_model_module_version": "1.5.0",
+       "_model_name": "DescriptionStyleModel",
+       "_view_count": null,
+       "_view_module": "@jupyter-widgets/base",
+       "_view_module_version": "1.2.0",
+       "_view_name": "StyleView",
+       "description_width": ""
+      }
+     },
+     "23b624e1517c46c083fb19112712a8a4": {
+      "model_module": "@jupyter-widgets/controls",
+      "model_module_version": "1.5.0",
+      "model_name": "ProgressStyleModel",
+      "state": {
+       "_model_module": "@jupyter-widgets/controls",
+       "_model_module_version": "1.5.0",
+       "_model_name": "ProgressStyleModel",
+       "_view_count": null,
+       "_view_module": "@jupyter-widgets/base",
+       "_view_module_version": "1.2.0",
+       "_view_name": "StyleView",
+       "bar_color": null,
+       "description_width": ""
+      }
+     },
+     "28a546a88db1498a9bb92b6ba2b0a4c5": {
+      "model_module": "@jupyter-widgets/base",
+      "model_module_version": "1.2.0",
+      "model_name": "LayoutModel",
+      "state": {
+       "_model_module": "@jupyter-widgets/base",
+       "_model_module_version": "1.2.0",
+       "_model_name": "LayoutModel",
+       "_view_count": null,
+       "_view_module": "@jupyter-widgets/base",
+       "_view_module_version": "1.2.0",
+       "_view_name": "LayoutView",
+       "align_content": null,
+       "align_items": null,
+       "align_self": null,
+       "border": null,
+       "bottom": null,
+       "display": null,
+       "flex": null,
+       "flex_flow": null,
+       "grid_area": null,
+       "grid_auto_columns": null,
+       "grid_auto_flow": null,
+       "grid_auto_rows": null,
+       "grid_column": null,
+       "grid_gap": null,
+       "grid_row": null,
+       "grid_template_areas": null,
+       "grid_template_columns": null,
+       "grid_template_rows": null,
+       "height": null,
+       "justify_content": null,
+       "justify_items": null,
+       "left": null,
+       "margin": null,
+       "max_height": null,
+       "max_width": null,
+       "min_height": null,
+       "min_width": null,
+       "object_fit": null,
+       "object_position": null,
+       "order": null,
+       "overflow": null,
+       "overflow_x": null,
+       "overflow_y": null,
+       "padding": null,
+       "right": null,
+       "top": null,
+       "visibility": null,
+       "width": null
+      }
+     },
+     "39395a57020841dcacc7c10703882292": {
+      "model_module": "@jupyter-widgets/base",
+      "model_module_version": "1.2.0",
+      "model_name": "LayoutModel",
+      "state": {
+       "_model_module": "@jupyter-widgets/base",
+       "_model_module_version": "1.2.0",
+       "_model_name": "LayoutModel",
+       "_view_count": null,
+       "_view_module": "@jupyter-widgets/base",
+       "_view_module_version": "1.2.0",
+       "_view_name": "LayoutView",
+       "align_content": null,
+       "align_items": null,
+       "align_self": null,
+       "border": null,
+       "bottom": null,
+       "display": null,
+       "flex": null,
+       "flex_flow": null,
+       "grid_area": null,
+       "grid_auto_columns": null,
+       "grid_auto_flow": null,
+       "grid_auto_rows": null,
+       "grid_column": null,
+       "grid_gap": null,
+       "grid_row": null,
+       "grid_template_areas": null,
+       "grid_template_columns": null,
+       "grid_template_rows": null,
+       "height": null,
+       "justify_content": null,
+       "justify_items": null,
+       "left": null,
+       "margin": null,
+       "max_height": null,
+       "max_width": null,
+       "min_height": null,
+       "min_width": null,
+       "object_fit": null,
+       "object_position": null,
+       "order": null,
+       "overflow": null,
+       "overflow_x": null,
+       "overflow_y": null,
+       "padding": null,
+       "right": null,
+       "top": null,
+       "visibility": null,
+       "width": null
+      }
+     },
+     "44f9d000be8742ff88b46f6d256e30a4": {
+      "model_module": "@jupyter-widgets/controls",
+      "model_module_version": "1.5.0",
+      "model_name": "FloatProgressModel",
+      "state": {
+       "_dom_classes": [],
+       "_model_module": "@jupyter-widgets/controls",
+       "_model_module_version": "1.5.0",
+       "_model_name": "FloatProgressModel",
+       "_view_count": null,
+       "_view_module": "@jupyter-widgets/controls",
+       "_view_module_version": "1.5.0",
+       "_view_name": "ProgressView",
+       "bar_style": "success",
+       "description": "",
+       "description_tooltip": null,
+       "layout": "IPY_MODEL_c7d90f7285a742289dfd6f57af84dc49",
+       "max": 5.0,
+       "min": 0.0,
+       "orientation": "horizontal",
+       "style": "IPY_MODEL_23b624e1517c46c083fb19112712a8a4",
+       "value": 5.0
+      }
+     },
+     "646f09bebd9245c186074b3a517485f3": {
+      "model_module": "@jupyter-widgets/controls",
+      "model_module_version": "1.5.0",
+      "model_name": "HBoxModel",
+      "state": {
+       "_dom_classes": [],
+       "_model_module": "@jupyter-widgets/controls",
+       "_model_module_version": "1.5.0",
+       "_model_name": "HBoxModel",
+       "_view_count": null,
+       "_view_module": "@jupyter-widgets/controls",
+       "_view_module_version": "1.5.0",
+       "_view_name": "HBoxView",
+       "box_style": "",
+       "children": [
+        "IPY_MODEL_b3b939acd8834411b49a03c6620ad84e",
+        "IPY_MODEL_44f9d000be8742ff88b46f6d256e30a4",
+        "IPY_MODEL_979b377395c8447aadcc5181219086f6"
+       ],
+       "layout": "IPY_MODEL_39395a57020841dcacc7c10703882292"
+      }
+     },
+     "979b377395c8447aadcc5181219086f6": {
+      "model_module": "@jupyter-widgets/controls",
+      "model_module_version": "1.5.0",
+      "model_name": "HTMLModel",
+      "state": {
+       "_dom_classes": [],
+       "_model_module": "@jupyter-widgets/controls",
+       "_model_module_version": "1.5.0",
+       "_model_name": "HTMLModel",
+       "_view_count": null,
+       "_view_module": "@jupyter-widgets/controls",
+       "_view_module_version": "1.5.0",
+       "_view_name": "HTMLView",
+       "description": "",
+       "description_tooltip": null,
+       "layout": "IPY_MODEL_28a546a88db1498a9bb92b6ba2b0a4c5",
+       "placeholder": "",
+       "style": "IPY_MODEL_1884fcbd64a9456dbd130120c3d4d8ba",
+       "value": " 5/5 [21:54&lt;00:00, 262.32s/it]"
+      }
+     },
+     "9b088d9442c54c9690f889b21203e3e1": {
+      "model_module": "@jupyter-widgets/base",
+      "model_module_version": "1.2.0",
+      "model_name": "LayoutModel",
+      "state": {
+       "_model_module": "@jupyter-widgets/base",
+       "_model_module_version": "1.2.0",
+       "_model_name": "LayoutModel",
+       "_view_count": null,
+       "_view_module": "@jupyter-widgets/base",
+       "_view_module_version": "1.2.0",
+       "_view_name": "LayoutView",
+       "align_content": null,
+       "align_items": null,
+       "align_self": null,
+       "border": null,
+       "bottom": null,
+       "display": null,
+       "flex": null,
+       "flex_flow": null,
+       "grid_area": null,
+       "grid_auto_columns": null,
+       "grid_auto_flow": null,
+       "grid_auto_rows": null,
+       "grid_column": null,
+       "grid_gap": null,
+       "grid_row": null,
+       "grid_template_areas": null,
+       "grid_template_columns": null,
+       "grid_template_rows": null,
+       "height": null,
+       "justify_content": null,
+       "justify_items": null,
+       "left": null,
+       "margin": null,
+       "max_height": null,
+       "max_width": null,
+       "min_height": null,
+       "min_width": null,
+       "object_fit": null,
+       "object_position": null,
+       "order": null,
+       "overflow": null,
+       "overflow_x": null,
+       "overflow_y": null,
+       "padding": null,
+       "right": null,
+       "top": null,
+       "visibility": null,
+       "width": null
+      }
+     },
+     "b3b939acd8834411b49a03c6620ad84e": {
+      "model_module": "@jupyter-widgets/controls",
+      "model_module_version": "1.5.0",
+      "model_name": "HTMLModel",
+      "state": {
+       "_dom_classes": [],
+       "_model_module": "@jupyter-widgets/controls",
+       "_model_module_version": "1.5.0",
+       "_model_name": "HTMLModel",
+       "_view_count": null,
+       "_view_module": "@jupyter-widgets/controls",
+       "_view_module_version": "1.5.0",
+       "_view_name": "HTMLView",
+       "description": "",
+       "description_tooltip": null,
+       "layout": "IPY_MODEL_9b088d9442c54c9690f889b21203e3e1",
+       "placeholder": "",
+       "style": "IPY_MODEL_ca62def05e9a4b55af75433b8de84833",
+       "value": "100%"
+      }
+     },
+     "c7d90f7285a742289dfd6f57af84dc49": {
+      "model_module": "@jupyter-widgets/base",
+      "model_module_version": "1.2.0",
+      "model_name": "LayoutModel",
+      "state": {
+       "_model_module": "@jupyter-widgets/base",
+       "_model_module_version": "1.2.0",
+       "_model_name": "LayoutModel",
+       "_view_count": null,
+       "_view_module": "@jupyter-widgets/base",
+       "_view_module_version": "1.2.0",
+       "_view_name": "LayoutView",
+       "align_content": null,
+       "align_items": null,
+       "align_self": null,
+       "border": null,
+       "bottom": null,
+       "display": null,
+       "flex": null,
+       "flex_flow": null,
+       "grid_area": null,
+       "grid_auto_columns": null,
+       "grid_auto_flow": null,
+       "grid_auto_rows": null,
+       "grid_column": null,
+       "grid_gap": null,
+       "grid_row": null,
+       "grid_template_areas": null,
+       "grid_template_columns": null,
+       "grid_template_rows": null,
+       "height": null,
+       "justify_content": null,
+       "justify_items": null,
+       "left": null,
+       "margin": null,
+       "max_height": null,
+       "max_width": null,
+       "min_height": null,
+       "min_width": null,
+       "object_fit": null,
+       "object_position": null,
+       "order": null,
+       "overflow": null,
+       "overflow_x": null,
+       "overflow_y": null,
+       "padding": null,
+       "right": null,
+       "top": null,
+       "visibility": null,
+       "width": null
+      }
+     },
+     "ca62def05e9a4b55af75433b8de84833": {
+      "model_module": "@jupyter-widgets/controls",
+      "model_module_version": "1.5.0",
+      "model_name": "DescriptionStyleModel",
+      "state": {
+       "_model_module": "@jupyter-widgets/controls",
+       "_model_module_version": "1.5.0",
+       "_model_name": "DescriptionStyleModel",
+       "_view_count": null,
+       "_view_module": "@jupyter-widgets/base",
+       "_view_module_version": "1.2.0",
+       "_view_name": "StyleView",
+       "description_width": ""
+      }
+     }
+    },
+    "version_major": 2,
+    "version_minor": 0
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

TabPFN.py/Tabpfn_classifier_ver1.ipynb ADDED Viewed

	@@ -0,0 +1,1442 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "2e34fca0",
+   "metadata": {
+    "_cell_guid": "b1076dfc-b9ad-4769-8c92-a6c4dae69d19",
+    "_uuid": "8f2839f25d086af736a60e9eeb907d3b93b6e0e5",
+    "execution": {
+     "iopub.execute_input": "2023-07-19T15:49:20.883704Z",
+     "iopub.status.busy": "2023-07-19T15:49:20.883307Z",
+     "iopub.status.idle": "2023-07-19T15:49:20.899082Z",
+     "shell.execute_reply": "2023-07-19T15:49:20.897481Z"
+    },
+    "papermill": {
+     "duration": 0.026307,
+     "end_time": "2023-07-19T15:49:20.901426",
+     "exception": false,
+     "start_time": "2023-07-19T15:49:20.875119",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "/kaggle/input/tabpfn-019-whl/tabpfn-0.1.9-py3-none-any.whl\n",
+      "/kaggle/input/tabpfn-019-whl/prior_diff_real_checkpoint_n_0_epoch_42.cpkt\n",
+      "/kaggle/input/tabpfn-019-whl/prior_diff_real_checkpoint_n_0_epoch_100.cpkt\n",
+      "/kaggle/input/icr-identify-age-related-conditions/sample_submission.csv\n",
+      "/kaggle/input/icr-identify-age-related-conditions/greeks.csv\n",
+      "/kaggle/input/icr-identify-age-related-conditions/train.csv\n",
+      "/kaggle/input/icr-identify-age-related-conditions/test.csv\n"
+     ]
+    }
+   ],
+   "source": [
+    "# This Python 3 environment comes with many helpful analytics libraries installed\n",
+    "# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python\n",
+    "# For example, here's several helpful packages to load\n",
+    "\n",
+    "import numpy as np # linear algebra\n",
+    "import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)\n",
+    "\n",
+    "# Input data files are available in the read-only \"../input/\" directory\n",
+    "# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory\n",
+    "\n",
+    "import os\n",
+    "for dirname, _, filenames in os.walk('/kaggle/input'):\n",
+    "    for filename in filenames:\n",
+    "        print(os.path.join(dirname, filename))\n",
+    "\n",
+    "# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using \"Save & Run All\" \n",
+    "# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "c7279ab3",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-19T15:49:20.914521Z",
+     "iopub.status.busy": "2023-07-19T15:49:20.914137Z",
+     "iopub.status.idle": "2023-07-19T15:49:52.144398Z",
+     "shell.execute_reply": "2023-07-19T15:49:52.143220Z"
+    },
+    "papermill": {
+     "duration": 31.239507,
+     "end_time": "2023-07-19T15:49:52.146861",
+     "exception": false,
+     "start_time": "2023-07-19T15:49:20.907354",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Processing /kaggle/input/tabpfn-019-whl/tabpfn-0.1.9-py3-none-any.whl\r\n",
+      "Requirement already satisfied: numpy>=1.21.2 in /opt/conda/lib/python3.10/site-packages (from tabpfn==0.1.9) (1.23.5)\r\n",
+      "Requirement already satisfied: pyyaml>=5.4.1 in /opt/conda/lib/python3.10/site-packages (from tabpfn==0.1.9) (6.0)\r\n",
+      "Requirement already satisfied: requests>=2.23.0 in /opt/conda/lib/python3.10/site-packages (from tabpfn==0.1.9) (2.31.0)\r\n",
+      "Requirement already satisfied: scikit-learn>=0.24.2 in /opt/conda/lib/python3.10/site-packages (from tabpfn==0.1.9) (1.2.2)\r\n",
+      "Requirement already satisfied: torch>=1.9.0 in /opt/conda/lib/python3.10/site-packages (from tabpfn==0.1.9) (2.0.0+cpu)\r\n",
+      "Requirement already satisfied: charset-normalizer<4,>=2 in /opt/conda/lib/python3.10/site-packages (from requests>=2.23.0->tabpfn==0.1.9) (3.1.0)\r\n",
+      "Requirement already satisfied: idna<4,>=2.5 in /opt/conda/lib/python3.10/site-packages (from requests>=2.23.0->tabpfn==0.1.9) (3.4)\r\n",
+      "Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/conda/lib/python3.10/site-packages (from requests>=2.23.0->tabpfn==0.1.9) (1.26.15)\r\n",
+      "Requirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.10/site-packages (from requests>=2.23.0->tabpfn==0.1.9) (2023.5.7)\r\n",
+      "Requirement already satisfied: scipy>=1.3.2 in /opt/conda/lib/python3.10/site-packages (from scikit-learn>=0.24.2->tabpfn==0.1.9) (1.11.1)\r\n",
+      "Requirement already satisfied: joblib>=1.1.1 in /opt/conda/lib/python3.10/site-packages (from scikit-learn>=0.24.2->tabpfn==0.1.9) (1.2.0)\r\n",
+      "Requirement already satisfied: threadpoolctl>=2.0.0 in /opt/conda/lib/python3.10/site-packages (from scikit-learn>=0.24.2->tabpfn==0.1.9) (3.1.0)\r\n",
+      "Requirement already satisfied: filelock in /opt/conda/lib/python3.10/site-packages (from torch>=1.9.0->tabpfn==0.1.9) (3.12.2)\r\n",
+      "Requirement already satisfied: typing-extensions in /opt/conda/lib/python3.10/site-packages (from torch>=1.9.0->tabpfn==0.1.9) (4.6.3)\r\n",
+      "Requirement already satisfied: sympy in /opt/conda/lib/python3.10/site-packages (from torch>=1.9.0->tabpfn==0.1.9) (1.12)\r\n",
+      "Requirement already satisfied: networkx in /opt/conda/lib/python3.10/site-packages (from torch>=1.9.0->tabpfn==0.1.9) (3.1)\r\n",
+      "Requirement already satisfied: jinja2 in /opt/conda/lib/python3.10/site-packages (from torch>=1.9.0->tabpfn==0.1.9) (3.1.2)\r\n",
+      "Requirement already satisfied: MarkupSafe>=2.0 in /opt/conda/lib/python3.10/site-packages (from jinja2->torch>=1.9.0->tabpfn==0.1.9) (2.1.3)\r\n",
+      "Requirement already satisfied: mpmath>=0.19 in /opt/conda/lib/python3.10/site-packages (from sympy->torch>=1.9.0->tabpfn==0.1.9) (1.3.0)\r\n",
+      "Installing collected packages: tabpfn\r\n",
+      "Successfully installed tabpfn-0.1.9\r\n"
+     ]
+    }
+   ],
+   "source": [
+    "!pip install /kaggle/input/tabpfn-019-whl/tabpfn-0.1.9-py3-none-any.whl"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "5dda34bb",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-19T15:49:52.165897Z",
+     "iopub.status.busy": "2023-07-19T15:49:52.165541Z",
+     "iopub.status.idle": "2023-07-19T15:49:53.511008Z",
+     "shell.execute_reply": "2023-07-19T15:49:53.509666Z"
+    },
+    "papermill": {
+     "duration": 1.356566,
+     "end_time": "2023-07-19T15:49:53.513451",
+     "exception": false,
+     "start_time": "2023-07-19T15:49:52.156885",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "!mkdir /opt/conda/lib/python3.10/site-packages/tabpfn/models_diff\n",
+    "!cp /kaggle/input/tabpfn-019-whl/prior_diff_real_checkpoint_n_0_epoch_100.cpkt /opt/conda/lib/python3.10/site-packages/tabpfn/models_diff/"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "048aa160",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-19T15:49:53.527449Z",
+     "iopub.status.busy": "2023-07-19T15:49:53.527029Z",
+     "iopub.status.idle": "2023-07-19T15:49:58.980538Z",
+     "shell.execute_reply": "2023-07-19T15:49:58.979590Z"
+    },
+    "papermill": {
+     "duration": 5.463371,
+     "end_time": "2023-07-19T15:49:58.982884",
+     "exception": false,
+     "start_time": "2023-07-19T15:49:53.519513",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/opt/conda/lib/python3.10/site-packages/scipy/__init__.py:146: UserWarning: A NumPy version >=1.16.5 and <1.23.0 is required for this version of SciPy (detected version 1.23.5\n",
+      "  warnings.warn(f\"A NumPy version >={np_minversion} and <{np_maxversion}\"\n"
+     ]
+    }
+   ],
+   "source": [
+    "from sklearn.preprocessing import LabelEncoder,normalize\n",
+    "from sklearn.ensemble import GradientBoostingClassifier,RandomForestClassifier\n",
+    "from sklearn.metrics import accuracy_score\n",
+    "from sklearn.impute import SimpleImputer\n",
+    "import imblearn\n",
+    "from imblearn.over_sampling import RandomOverSampler\n",
+    "from imblearn.under_sampling import RandomUnderSampler\n",
+    "import xgboost\n",
+    "import inspect\n",
+    "from collections import defaultdict\n",
+    "from tabpfn import TabPFNClassifier\n",
+    "import warnings\n",
+    "warnings.filterwarnings('ignore')\n",
+    "from sklearn.model_selection import KFold as KF, GridSearchCV\n",
+    "from tqdm.notebook import tqdm\n",
+    "from datetime import datetime"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "c860d4a3",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-19T15:49:58.998778Z",
+     "iopub.status.busy": "2023-07-19T15:49:58.997666Z",
+     "iopub.status.idle": "2023-07-19T15:49:59.003664Z",
+     "shell.execute_reply": "2023-07-19T15:49:59.002318Z"
+    },
+    "papermill": {
+     "duration": 0.015543,
+     "end_time": "2023-07-19T15:49:59.006138",
+     "exception": false,
+     "start_time": "2023-07-19T15:49:58.990595",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "pd.set_option('display.max_columns', None)\n",
+    "pd.set_option('display.max_rows', None)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "f7c35d8a",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-19T15:49:59.021171Z",
+     "iopub.status.busy": "2023-07-19T15:49:59.020777Z",
+     "iopub.status.idle": "2023-07-19T15:49:59.074163Z",
+     "shell.execute_reply": "2023-07-19T15:49:59.073110Z"
+    },
+    "papermill": {
+     "duration": 0.064133,
+     "end_time": "2023-07-19T15:49:59.077033",
+     "exception": false,
+     "start_time": "2023-07-19T15:49:59.012900",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "train_df = pd.read_csv(\"/kaggle/input/icr-identify-age-related-conditions/train.csv\")\n",
+    "test_df = pd.read_csv(\"/kaggle/input/icr-identify-age-related-conditions/test.csv\")\n",
+    "greeks_df = pd.read_csv(\"/kaggle/input/icr-identify-age-related-conditions/greeks.csv\")\n",
+    "sample_submission = pd.read_csv(\"/kaggle/input/icr-identify-age-related-conditions/sample_submission.csv\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "e6f4a8d9",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-19T15:49:59.092298Z",
+     "iopub.status.busy": "2023-07-19T15:49:59.091276Z",
+     "iopub.status.idle": "2023-07-19T15:49:59.108088Z",
+     "shell.execute_reply": "2023-07-19T15:49:59.106443Z"
+    },
+    "papermill": {
+     "duration": 0.027512,
+     "end_time": "2023-07-19T15:49:59.110901",
+     "exception": false,
+     "start_time": "2023-07-19T15:49:59.083389",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "first_category = train_df.EJ.unique()[0]\n",
+    "train_df.EJ = train_df.EJ.eq(first_category).astype('int')\n",
+    "test_df.EJ = test_df.EJ.eq(first_category).astype('int')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "6cc16dc5",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-19T15:49:59.126235Z",
+     "iopub.status.busy": "2023-07-19T15:49:59.124902Z",
+     "iopub.status.idle": "2023-07-19T15:49:59.131980Z",
+     "shell.execute_reply": "2023-07-19T15:49:59.131296Z"
+    },
+    "papermill": {
+     "duration": 0.016797,
+     "end_time": "2023-07-19T15:49:59.134213",
+     "exception": false,
+     "start_time": "2023-07-19T15:49:59.117416",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "def random_under_sampler(df):\n",
+    "    neg, pos = np.bincount(df['Class'])\n",
+    "    one_df = df.loc[df['Class'] == 1] \n",
+    "    zero_df = df.loc[df['Class'] == 0]\n",
+    "    zero_df = zero_df.sample(n=pos)\n",
+    "    undersampled_df = pd.concat([zero_df, one_df])\n",
+    "    return undersampled_df.sample(frac = 1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "72c5ebbc",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-19T15:49:59.147644Z",
+     "iopub.status.busy": "2023-07-19T15:49:59.147321Z",
+     "iopub.status.idle": "2023-07-19T15:49:59.161115Z",
+     "shell.execute_reply": "2023-07-19T15:49:59.160423Z"
+    },
+    "papermill": {
+     "duration": 0.023061,
+     "end_time": "2023-07-19T15:49:59.163502",
+     "exception": false,
+     "start_time": "2023-07-19T15:49:59.140441",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "train_good = random_under_sampler(train_df)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "e005e9ad",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-19T15:49:59.178245Z",
+     "iopub.status.busy": "2023-07-19T15:49:59.177838Z",
+     "iopub.status.idle": "2023-07-19T15:49:59.185746Z",
+     "shell.execute_reply": "2023-07-19T15:49:59.184237Z"
+    },
+    "papermill": {
+     "duration": 0.018132,
+     "end_time": "2023-07-19T15:49:59.188463",
+     "exception": false,
+     "start_time": "2023-07-19T15:49:59.170331",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(216, 58)"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "train_good.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "c4cd1123",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-19T15:49:59.204657Z",
+     "iopub.status.busy": "2023-07-19T15:49:59.204301Z",
+     "iopub.status.idle": "2023-07-19T15:49:59.215752Z",
+     "shell.execute_reply": "2023-07-19T15:49:59.213946Z"
+    },
+    "papermill": {
+     "duration": 0.023495,
+     "end_time": "2023-07-19T15:49:59.218315",
+     "exception": false,
+     "start_time": "2023-07-19T15:49:59.194820",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "predictor_columns = [n for n in train_df.columns if n != 'Class' and n != 'Id']\n",
+    "x= train_df[predictor_columns]\n",
+    "y = train_df['Class']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "bd89b899",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-19T15:49:59.232391Z",
+     "iopub.status.busy": "2023-07-19T15:49:59.232003Z",
+     "iopub.status.idle": "2023-07-19T15:49:59.236624Z",
+     "shell.execute_reply": "2023-07-19T15:49:59.235735Z"
+    },
+    "papermill": {
+     "duration": 0.013656,
+     "end_time": "2023-07-19T15:49:59.238300",
+     "exception": false,
+     "start_time": "2023-07-19T15:49:59.224644",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "cv_outer = KF(n_splits = 10, shuffle=True, random_state=42)\n",
+    "cv_inner = KF(n_splits = 5, shuffle=True, random_state=42)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "id": "e9063643",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-19T15:49:59.252426Z",
+     "iopub.status.busy": "2023-07-19T15:49:59.251721Z",
+     "iopub.status.idle": "2023-07-19T15:49:59.257645Z",
+     "shell.execute_reply": "2023-07-19T15:49:59.256965Z"
+    },
+    "papermill": {
+     "duration": 0.01509,
+     "end_time": "2023-07-19T15:49:59.259617",
+     "exception": false,
+     "start_time": "2023-07-19T15:49:59.244527",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "def balanced_log_loss(y_true, y_pred):\n",
+    "    N_0 = np.sum(1 - y_true)\n",
+    "    N_1 = np.sum(y_true)\n",
+    "    w_0 = 1 / N_0\n",
+    "    w_1 = 1 / N_1\n",
+    "    p_1 = np.clip(y_pred, 1e-15, 1 - 1e-15)\n",
+    "    p_0 = 1 - p_1\n",
+    "    log_loss_0 = -np.sum((1 - y_true) * np.log(p_0))\n",
+    "    log_loss_1 = -np.sum(y_true * np.log(p_1))\n",
+    "    balanced_log_loss = 2*(w_0 * log_loss_0 + w_1 * log_loss_1) / (w_0 + w_1)\n",
+    "    return balanced_log_loss/(N_0+N_1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "id": "9b67e0f4",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-19T15:49:59.274215Z",
+     "iopub.status.busy": "2023-07-19T15:49:59.273585Z",
+     "iopub.status.idle": "2023-07-19T15:49:59.283466Z",
+     "shell.execute_reply": "2023-07-19T15:49:59.282130Z"
+    },
+    "papermill": {
+     "duration": 0.020008,
+     "end_time": "2023-07-19T15:49:59.285916",
+     "exception": false,
+     "start_time": "2023-07-19T15:49:59.265908",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "class Ensemble():\n",
+    "    def __init__(self):\n",
+    "        self.imputer = SimpleImputer(missing_values=np.nan, strategy='median')\n",
+    "        self.classifiers =[xgboost.XGBClassifier(),TabPFNClassifier(N_ensemble_configurations=64)]\n",
+    "    \n",
+    "    def fit(self,X,y):\n",
+    "        y = y.values\n",
+    "        unique_classes, y = np.unique(y, return_inverse=True)\n",
+    "        self.classes_ = unique_classes\n",
+    "        first_category = X.EJ.unique()[0]\n",
+    "        X.EJ = X.EJ.eq(first_category).astype('int')\n",
+    "        X = self.imputer.fit_transform(X)\n",
+    "#         X = normalize(X,axis=0)\n",
+    "        for classifier in self.classifiers:\n",
+    "            if classifier==self.classifiers[1]:\n",
+    "                classifier.fit(X,y,overwrite_warning =True)\n",
+    "            else :\n",
+    "                classifier.fit(X, y)\n",
+    "     \n",
+    "    def predict_proba(self, x):\n",
+    "        x = self.imputer.transform(x)\n",
+    "#         x = normalize(x,axis=0)\n",
+    "        probabilities = np.stack([classifier.predict_proba(x) for classifier in self.classifiers])\n",
+    "        averaged_probabilities = np.mean(probabilities, axis=0)\n",
+    "        class_0_est_instances = averaged_probabilities[:, 0].sum()\n",
+    "        others_est_instances = averaged_probabilities[:, 1:].sum()\n",
+    "        # Weighted probabilities based on class imbalance\n",
+    "        new_probabilities = averaged_probabilities * np.array([[1/(class_0_est_instances if i==0 else others_est_instances) for i in range(averaged_probabilities.shape[1])]])\n",
+    "        return new_probabilities / np.sum(new_probabilities, axis=1, keepdims=1) "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "id": "d168bc26",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-19T15:49:59.300477Z",
+     "iopub.status.busy": "2023-07-19T15:49:59.299679Z",
+     "iopub.status.idle": "2023-07-19T15:49:59.308853Z",
+     "shell.execute_reply": "2023-07-19T15:49:59.307755Z"
+    },
+    "papermill": {
+     "duration": 0.018791,
+     "end_time": "2023-07-19T15:49:59.311141",
+     "exception": false,
+     "start_time": "2023-07-19T15:49:59.292350",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "def training(model, x,y,y_meta):\n",
+    "    outer_results = list()\n",
+    "    best_loss = np.inf\n",
+    "    split = 0\n",
+    "    splits = 5\n",
+    "    for train_idx,val_idx in tqdm(cv_inner.split(x), total = splits):\n",
+    "        split+=1\n",
+    "        x_train, x_val = x.iloc[train_idx],x.iloc[val_idx]\n",
+    "        y_train, y_val = y_meta.iloc[train_idx], y.iloc[val_idx]\n",
+    "                \n",
+    "        model.fit(x_train, y_train)\n",
+    "        y_pred = model.predict_proba(x_val)\n",
+    "        probabilities = np.concatenate((y_pred[:,:1], np.sum(y_pred[:,1:], 1, keepdims=True)), axis=1)\n",
+    "        p0 = probabilities[:,:1]\n",
+    "        p0[p0 > 0.86] = 1\n",
+    "        p0[p0 < 0.14] = 0\n",
+    "        y_p = np.empty((y_pred.shape[0],))\n",
+    "        for i in range(y_pred.shape[0]):\n",
+    "            if p0[i]>=0.5:\n",
+    "                y_p[i]= False\n",
+    "            else :\n",
+    "                y_p[i]=True\n",
+    "        y_p = y_p.astype(int)\n",
+    "        loss = balanced_log_loss(y_val,y_p)\n",
+    "\n",
+    "        if loss<best_loss:\n",
+    "            best_model = model\n",
+    "            best_loss = loss\n",
+    "            print('best_model_saved')\n",
+    "        outer_results.append(loss)\n",
+    "        print('>val_loss=',loss, 'split =',split)\n",
+    "    print('LOSS:', np.mean(outer_results))\n",
+    "    return best_model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "id": "079c1769",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-19T15:49:59.325185Z",
+     "iopub.status.busy": "2023-07-19T15:49:59.324666Z",
+     "iopub.status.idle": "2023-07-19T15:49:59.342172Z",
+     "shell.execute_reply": "2023-07-19T15:49:59.341421Z"
+    },
+    "papermill": {
+     "duration": 0.027008,
+     "end_time": "2023-07-19T15:49:59.344419",
+     "exception": false,
+     "start_time": "2023-07-19T15:49:59.317411",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "times = greeks_df.Epsilon.copy()\n",
+    "times[greeks_df.Epsilon != 'Unknown'] = greeks_df.Epsilon[greeks_df.Epsilon != 'Unknown'].map(lambda x: datetime.strptime(x,'%m/%d/%Y').toordinal())\n",
+    "times[greeks_df.Epsilon == 'Unknown'] = np.nan"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "id": "06e0bc9e",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-19T15:49:59.358648Z",
+     "iopub.status.busy": "2023-07-19T15:49:59.358118Z",
+     "iopub.status.idle": "2023-07-19T15:49:59.369274Z",
+     "shell.execute_reply": "2023-07-19T15:49:59.368592Z"
+    },
+    "papermill": {
+     "duration": 0.020368,
+     "end_time": "2023-07-19T15:49:59.371199",
+     "exception": false,
+     "start_time": "2023-07-19T15:49:59.350831",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "train_pred_and_time = pd.concat((train_df, times), axis=1)\n",
+    "test_predictors = test_df[predictor_columns]\n",
+    "first_category = test_predictors.EJ.unique()[0]\n",
+    "test_predictors.EJ = test_predictors.EJ.eq(first_category).astype('int')\n",
+    "test_pred_and_time = np.concatenate((test_predictors, np.zeros((len(test_predictors), 1)) + train_pred_and_time.Epsilon.max() + 1), axis=1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "id": "f2cfb985",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-19T15:49:59.385041Z",
+     "iopub.status.busy": "2023-07-19T15:49:59.384583Z",
+     "iopub.status.idle": "2023-07-19T15:49:59.412622Z",
+     "shell.execute_reply": "2023-07-19T15:49:59.411592Z"
+    },
+    "papermill": {
+     "duration": 0.037351,
+     "end_time": "2023-07-19T15:49:59.414704",
+     "exception": false,
+     "start_time": "2023-07-19T15:49:59.377353",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Original dataset shape\n",
+      "A    509\n",
+      "B     61\n",
+      "G     29\n",
+      "D     18\n",
+      "Name: Alpha, dtype: int64\n",
+      "Resample dataset shape\n",
+      "B    509\n",
+      "A    509\n",
+      "D    509\n",
+      "G    509\n",
+      "Name: Alpha, dtype: int64\n"
+     ]
+    }
+   ],
+   "source": [
+    "ros = RandomOverSampler(random_state=42)\n",
+    "\n",
+    "train_ros, y_ros = ros.fit_resample(train_pred_and_time, greeks_df.Alpha)\n",
+    "print('Original dataset shape')\n",
+    "print(greeks_df.Alpha.value_counts())\n",
+    "print('Resample dataset shape')\n",
+    "print( y_ros.value_counts())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "id": "011c35ed",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-19T15:49:59.429884Z",
+     "iopub.status.busy": "2023-07-19T15:49:59.429313Z",
+     "iopub.status.idle": "2023-07-19T15:49:59.435592Z",
+     "shell.execute_reply": "2023-07-19T15:49:59.434689Z"
+    },
+    "papermill": {
+     "duration": 0.016651,
+     "end_time": "2023-07-19T15:49:59.437651",
+     "exception": false,
+     "start_time": "2023-07-19T15:49:59.421000",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "x_ros = train_ros.drop(['Class', 'Id'],axis=1)\n",
+    "y_ = train_ros.Class"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "id": "d25f1095",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-19T15:49:59.451738Z",
+     "iopub.status.busy": "2023-07-19T15:49:59.451218Z",
+     "iopub.status.idle": "2023-07-19T15:49:59.867640Z",
+     "shell.execute_reply": "2023-07-19T15:49:59.866649Z"
+    },
+    "papermill": {
+     "duration": 0.425633,
+     "end_time": "2023-07-19T15:49:59.869598",
+     "exception": false,
+     "start_time": "2023-07-19T15:49:59.443965",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Loading model that can be used for inference only\n",
+      "Using a Transformer with 25.82 M parameters\n"
+     ]
+    }
+   ],
+   "source": [
+    "yt = Ensemble()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "id": "edb43c1a",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-19T15:49:59.884325Z",
+     "iopub.status.busy": "2023-07-19T15:49:59.883762Z",
+     "iopub.status.idle": "2023-07-19T16:01:07.842831Z",
+     "shell.execute_reply": "2023-07-19T16:01:07.841617Z"
+    },
+    "papermill": {
+     "duration": 667.976046,
+     "end_time": "2023-07-19T16:01:07.852065",
+     "exception": false,
+     "start_time": "2023-07-19T15:49:59.876019",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "bd3cb9f78122483eb70d16ca6c7b8962",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "  0%|          | 0/5 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "best_model_saved\n",
+      ">val_loss= 0.12283393999583053 split = 1\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/tmp/ipykernel_20/2226499128.py:11: SettingWithCopyWarning: \n",
+      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+      "Try using .loc[row_indexer,col_indexer] = value instead\n",
+      "\n",
+      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+      "  X.EJ = X.EJ.eq(first_category).astype('int')\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "best_model_saved\n",
+      ">val_loss= 7.882664572210757e-16 split = 2\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/tmp/ipykernel_20/2226499128.py:11: SettingWithCopyWarning: \n",
+      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+      "Try using .loc[row_indexer,col_indexer] = value instead\n",
+      "\n",
+      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+      "  X.EJ = X.EJ.eq(first_category).astype('int')\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      ">val_loss= 7.927542919637485e-16 split = 3\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/tmp/ipykernel_20/2226499128.py:11: SettingWithCopyWarning: \n",
+      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+      "Try using .loc[row_indexer,col_indexer] = value instead\n",
+      "\n",
+      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+      "  X.EJ = X.EJ.eq(first_category).astype('int')\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "best_model_saved\n",
+      ">val_loss= 6.883759419809394e-16 split = 4\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/tmp/ipykernel_20/2226499128.py:11: SettingWithCopyWarning: \n",
+      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+      "Try using .loc[row_indexer,col_indexer] = value instead\n",
+      "\n",
+      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+      "  X.EJ = X.EJ.eq(first_category).astype('int')\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      ">val_loss= 0.13386381920254847 split = 5\n",
+      "LOSS: 0.051339551839676256\n"
+     ]
+    }
+   ],
+   "source": [
+    "m = training(yt,x_ros,y_,y_ros)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "id": "5a469b9e",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-19T16:01:07.867409Z",
+     "iopub.status.busy": "2023-07-19T16:01:07.867041Z",
+     "iopub.status.idle": "2023-07-19T16:01:07.875651Z",
+     "shell.execute_reply": "2023-07-19T16:01:07.874672Z"
+    },
+    "papermill": {
+     "duration": 0.019324,
+     "end_time": "2023-07-19T16:01:07.878246",
+     "exception": false,
+     "start_time": "2023-07-19T16:01:07.858922",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "1    0.75\n",
+       "0    0.25\n",
+       "Name: Class, dtype: float64"
+      ]
+     },
+     "execution_count": 22,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "y_.value_counts()/y_.shape[0]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "id": "ae189a8b",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-19T16:01:07.894782Z",
+     "iopub.status.busy": "2023-07-19T16:01:07.894387Z",
+     "iopub.status.idle": "2023-07-19T16:02:58.756981Z",
+     "shell.execute_reply": "2023-07-19T16:02:58.756252Z"
+    },
+    "papermill": {
+     "duration": 110.872949,
+     "end_time": "2023-07-19T16:02:58.758958",
+     "exception": false,
+     "start_time": "2023-07-19T16:01:07.886009",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/opt/conda/lib/python3.10/site-packages/sklearn/base.py:439: UserWarning: X does not have valid feature names, but SimpleImputer was fitted with feature names\n",
+      "  warnings.warn(\n"
+     ]
+    }
+   ],
+   "source": [
+    "y_pred = m.predict_proba(test_pred_and_time)\n",
+    "probabilities = np.concatenate((y_pred[:,:1], np.sum(y_pred[:,1:], 1, keepdims=True)), axis=1)\n",
+    "p0 = probabilities[:,:1]\n",
+    "p0[p0 > 0.70] = 1 \n",
+    "p0[p0 < 0.26] = 0"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "id": "351548b7",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-19T16:02:58.775878Z",
+     "iopub.status.busy": "2023-07-19T16:02:58.774253Z",
+     "iopub.status.idle": "2023-07-19T16:02:58.785182Z",
+     "shell.execute_reply": "2023-07-19T16:02:58.784481Z"
+    },
+    "papermill": {
+     "duration": 0.021177,
+     "end_time": "2023-07-19T16:02:58.787220",
+     "exception": false,
+     "start_time": "2023-07-19T16:02:58.766043",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "submission = pd.DataFrame(test_df[\"Id\"], columns=[\"Id\"])\n",
+    "submission[\"class_0\"] = p0\n",
+    "submission[\"class_1\"] = 1 - p0\n",
+    "submission.to_csv('submission.csv', index=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "id": "37c7c730",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-19T16:02:58.803532Z",
+     "iopub.status.busy": "2023-07-19T16:02:58.802592Z",
+     "iopub.status.idle": "2023-07-19T16:02:58.821606Z",
+     "shell.execute_reply": "2023-07-19T16:02:58.820310Z"
+    },
+    "papermill": {
+     "duration": 0.029582,
+     "end_time": "2023-07-19T16:02:58.824105",
+     "exception": false,
+     "start_time": "2023-07-19T16:02:58.794523",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Id</th>\n",
+       "      <th>class_0</th>\n",
+       "      <th>class_1</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>00eed32682bb</td>\n",
+       "      <td>0.5</td>\n",
+       "      <td>0.5</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>010ebe33f668</td>\n",
+       "      <td>0.5</td>\n",
+       "      <td>0.5</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>02fa521e1838</td>\n",
+       "      <td>0.5</td>\n",
+       "      <td>0.5</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>040e15f562a2</td>\n",
+       "      <td>0.5</td>\n",
+       "      <td>0.5</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>046e85c7cc7f</td>\n",
+       "      <td>0.5</td>\n",
+       "      <td>0.5</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "             Id  class_0  class_1\n",
+       "0  00eed32682bb      0.5      0.5\n",
+       "1  010ebe33f668      0.5      0.5\n",
+       "2  02fa521e1838      0.5      0.5\n",
+       "3  040e15f562a2      0.5      0.5\n",
+       "4  046e85c7cc7f      0.5      0.5"
+      ]
+     },
+     "execution_count": 25,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "submission_df = pd.read_csv('submission.csv')\n",
+    "submission_df"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.12"
+  },
+  "papermill": {
+   "default_parameters": {},
+   "duration": 828.301857,
+   "end_time": "2023-07-19T16:03:00.358486",
+   "environment_variables": {},
+   "exception": null,
+   "input_path": "__notebook__.ipynb",
+   "output_path": "__notebook__.ipynb",
+   "parameters": {},
+   "start_time": "2023-07-19T15:49:12.056629",
+   "version": "2.4.0"
+  },
+  "widgets": {
+   "application/vnd.jupyter.widget-state+json": {
+    "state": {
+     "34c26c71e174412d8c6bce15f6cf55ab": {
+      "model_module": "@jupyter-widgets/controls",
+      "model_module_version": "1.5.0",
+      "model_name": "ProgressStyleModel",
+      "state": {
+       "_model_module": "@jupyter-widgets/controls",
+       "_model_module_version": "1.5.0",
+       "_model_name": "ProgressStyleModel",
+       "_view_count": null,
+       "_view_module": "@jupyter-widgets/base",
+       "_view_module_version": "1.2.0",
+       "_view_name": "StyleView",
+       "bar_color": null,
+       "description_width": ""
+      }
+     },
+     "4bdcfdf8ea294ffa851124a60dd797b5": {
+      "model_module": "@jupyter-widgets/base",
+      "model_module_version": "1.2.0",
+      "model_name": "LayoutModel",
+      "state": {
+       "_model_module": "@jupyter-widgets/base",
+       "_model_module_version": "1.2.0",
+       "_model_name": "LayoutModel",
+       "_view_count": null,
+       "_view_module": "@jupyter-widgets/base",
+       "_view_module_version": "1.2.0",
+       "_view_name": "LayoutView",
+       "align_content": null,
+       "align_items": null,
+       "align_self": null,
+       "border": null,
+       "bottom": null,
+       "display": null,
+       "flex": null,
+       "flex_flow": null,
+       "grid_area": null,
+       "grid_auto_columns": null,
+       "grid_auto_flow": null,
+       "grid_auto_rows": null,
+       "grid_column": null,
+       "grid_gap": null,
+       "grid_row": null,
+       "grid_template_areas": null,
+       "grid_template_columns": null,
+       "grid_template_rows": null,
+       "height": null,
+       "justify_content": null,
+       "justify_items": null,
+       "left": null,
+       "margin": null,
+       "max_height": null,
+       "max_width": null,
+       "min_height": null,
+       "min_width": null,
+       "object_fit": null,
+       "object_position": null,
+       "order": null,
+       "overflow": null,
+       "overflow_x": null,
+       "overflow_y": null,
+       "padding": null,
+       "right": null,
+       "top": null,
+       "visibility": null,
+       "width": null
+      }
+     },
+     "86be6094b7ea4c018546c7a08ac21c32": {
+      "model_module": "@jupyter-widgets/controls",
+      "model_module_version": "1.5.0",
+      "model_name": "DescriptionStyleModel",
+      "state": {
+       "_model_module": "@jupyter-widgets/controls",
+       "_model_module_version": "1.5.0",
+       "_model_name": "DescriptionStyleModel",
+       "_view_count": null,
+       "_view_module": "@jupyter-widgets/base",
+       "_view_module_version": "1.2.0",
+       "_view_name": "StyleView",
+       "description_width": ""
+      }
+     },
+     "92a276fc64f04f4a9220c8ecc22115b2": {
+      "model_module": "@jupyter-widgets/controls",
+      "model_module_version": "1.5.0",
+      "model_name": "FloatProgressModel",
+      "state": {
+       "_dom_classes": [],
+       "_model_module": "@jupyter-widgets/controls",
+       "_model_module_version": "1.5.0",
+       "_model_name": "FloatProgressModel",
+       "_view_count": null,
+       "_view_module": "@jupyter-widgets/controls",
+       "_view_module_version": "1.5.0",
+       "_view_name": "ProgressView",
+       "bar_style": "success",
+       "description": "",
+       "description_tooltip": null,
+       "layout": "IPY_MODEL_9ca2f9f0ca2f4c368d1589f3daef97b6",
+       "max": 5.0,
+       "min": 0.0,
+       "orientation": "horizontal",
+       "style": "IPY_MODEL_34c26c71e174412d8c6bce15f6cf55ab",
+       "value": 5.0
+      }
+     },
+     "9ca2f9f0ca2f4c368d1589f3daef97b6": {
+      "model_module": "@jupyter-widgets/base",
+      "model_module_version": "1.2.0",
+      "model_name": "LayoutModel",
+      "state": {
+       "_model_module": "@jupyter-widgets/base",
+       "_model_module_version": "1.2.0",
+       "_model_name": "LayoutModel",
+       "_view_count": null,
+       "_view_module": "@jupyter-widgets/base",
+       "_view_module_version": "1.2.0",
+       "_view_name": "LayoutView",
+       "align_content": null,
+       "align_items": null,
+       "align_self": null,
+       "border": null,
+       "bottom": null,
+       "display": null,
+       "flex": null,
+       "flex_flow": null,
+       "grid_area": null,
+       "grid_auto_columns": null,
+       "grid_auto_flow": null,
+       "grid_auto_rows": null,
+       "grid_column": null,
+       "grid_gap": null,
+       "grid_row": null,
+       "grid_template_areas": null,
+       "grid_template_columns": null,
+       "grid_template_rows": null,
+       "height": null,
+       "justify_content": null,
+       "justify_items": null,
+       "left": null,
+       "margin": null,
+       "max_height": null,
+       "max_width": null,
+       "min_height": null,
+       "min_width": null,
+       "object_fit": null,
+       "object_position": null,
+       "order": null,
+       "overflow": null,
+       "overflow_x": null,
+       "overflow_y": null,
+       "padding": null,
+       "right": null,
+       "top": null,
+       "visibility": null,
+       "width": null
+      }
+     },
+     "9d0450740ef245988383b5b43528cb3d": {
+      "model_module": "@jupyter-widgets/controls",
+      "model_module_version": "1.5.0",
+      "model_name": "DescriptionStyleModel",
+      "state": {
+       "_model_module": "@jupyter-widgets/controls",
+       "_model_module_version": "1.5.0",
+       "_model_name": "DescriptionStyleModel",
+       "_view_count": null,
+       "_view_module": "@jupyter-widgets/base",
+       "_view_module_version": "1.2.0",
+       "_view_name": "StyleView",
+       "description_width": ""
+      }
+     },
+     "a6254b23a9df47ec88478882c76e34a1": {
+      "model_module": "@jupyter-widgets/controls",
+      "model_module_version": "1.5.0",
+      "model_name": "HTMLModel",
+      "state": {
+       "_dom_classes": [],
+       "_model_module": "@jupyter-widgets/controls",
+       "_model_module_version": "1.5.0",
+       "_model_name": "HTMLModel",
+       "_view_count": null,
+       "_view_module": "@jupyter-widgets/controls",
+       "_view_module_version": "1.5.0",
+       "_view_name": "HTMLView",
+       "description": "",
+       "description_tooltip": null,
+       "layout": "IPY_MODEL_4bdcfdf8ea294ffa851124a60dd797b5",
+       "placeholder": "",
+       "style": "IPY_MODEL_86be6094b7ea4c018546c7a08ac21c32",
+       "value": "100%"
+      }
+     },
+     "b38b9d8d5d294e01bef5692bb9f9a086": {
+      "model_module": "@jupyter-widgets/controls",
+      "model_module_version": "1.5.0",
+      "model_name": "HTMLModel",
+      "state": {
+       "_dom_classes": [],
+       "_model_module": "@jupyter-widgets/controls",
+       "_model_module_version": "1.5.0",
+       "_model_name": "HTMLModel",
+       "_view_count": null,
+       "_view_module": "@jupyter-widgets/controls",
+       "_view_module_version": "1.5.0",
+       "_view_name": "HTMLView",
+       "description": "",
+       "description_tooltip": null,
+       "layout": "IPY_MODEL_bf81d5f910ca475798b1bc946f8475b5",
+       "placeholder": "",
+       "style": "IPY_MODEL_9d0450740ef245988383b5b43528cb3d",
+       "value": " 5/5 [11:07&lt;00:00, 133.98s/it]"
+      }
+     },
+     "b9ccf904ba1f46aaae2cc1c094d45b0b": {
+      "model_module": "@jupyter-widgets/base",
+      "model_module_version": "1.2.0",
+      "model_name": "LayoutModel",
+      "state": {
+       "_model_module": "@jupyter-widgets/base",
+       "_model_module_version": "1.2.0",
+       "_model_name": "LayoutModel",
+       "_view_count": null,
+       "_view_module": "@jupyter-widgets/base",
+       "_view_module_version": "1.2.0",
+       "_view_name": "LayoutView",
+       "align_content": null,
+       "align_items": null,
+       "align_self": null,
+       "border": null,
+       "bottom": null,
+       "display": null,
+       "flex": null,
+       "flex_flow": null,
+       "grid_area": null,
+       "grid_auto_columns": null,
+       "grid_auto_flow": null,
+       "grid_auto_rows": null,
+       "grid_column": null,
+       "grid_gap": null,
+       "grid_row": null,
+       "grid_template_areas": null,
+       "grid_template_columns": null,
+       "grid_template_rows": null,
+       "height": null,
+       "justify_content": null,
+       "justify_items": null,
+       "left": null,
+       "margin": null,
+       "max_height": null,
+       "max_width": null,
+       "min_height": null,
+       "min_width": null,
+       "object_fit": null,
+       "object_position": null,
+       "order": null,
+       "overflow": null,
+       "overflow_x": null,
+       "overflow_y": null,
+       "padding": null,
+       "right": null,
+       "top": null,
+       "visibility": null,
+       "width": null
+      }
+     },
+     "bd3cb9f78122483eb70d16ca6c7b8962": {
+      "model_module": "@jupyter-widgets/controls",
+      "model_module_version": "1.5.0",
+      "model_name": "HBoxModel",
+      "state": {
+       "_dom_classes": [],
+       "_model_module": "@jupyter-widgets/controls",
+       "_model_module_version": "1.5.0",
+       "_model_name": "HBoxModel",
+       "_view_count": null,
+       "_view_module": "@jupyter-widgets/controls",
+       "_view_module_version": "1.5.0",
+       "_view_name": "HBoxView",
+       "box_style": "",
+       "children": [
+        "IPY_MODEL_a6254b23a9df47ec88478882c76e34a1",
+        "IPY_MODEL_92a276fc64f04f4a9220c8ecc22115b2",
+        "IPY_MODEL_b38b9d8d5d294e01bef5692bb9f9a086"
+       ],
+       "layout": "IPY_MODEL_b9ccf904ba1f46aaae2cc1c094d45b0b"
+      }
+     },
+     "bf81d5f910ca475798b1bc946f8475b5": {
+      "model_module": "@jupyter-widgets/base",
+      "model_module_version": "1.2.0",
+      "model_name": "LayoutModel",
+      "state": {
+       "_model_module": "@jupyter-widgets/base",
+       "_model_module_version": "1.2.0",
+       "_model_name": "LayoutModel",
+       "_view_count": null,
+       "_view_module": "@jupyter-widgets/base",
+       "_view_module_version": "1.2.0",
+       "_view_name": "LayoutView",
+       "align_content": null,
+       "align_items": null,
+       "align_self": null,
+       "border": null,
+       "bottom": null,
+       "display": null,
+       "flex": null,
+       "flex_flow": null,
+       "grid_area": null,
+       "grid_auto_columns": null,
+       "grid_auto_flow": null,
+       "grid_auto_rows": null,
+       "grid_column": null,
+       "grid_gap": null,
+       "grid_row": null,
+       "grid_template_areas": null,
+       "grid_template_columns": null,
+       "grid_template_rows": null,
+       "height": null,
+       "justify_content": null,
+       "justify_items": null,
+       "left": null,
+       "margin": null,
+       "max_height": null,
+       "max_width": null,
+       "min_height": null,
+       "min_width": null,
+       "object_fit": null,
+       "object_position": null,
+       "order": null,
+       "overflow": null,
+       "overflow_x": null,
+       "overflow_y": null,
+       "padding": null,
+       "right": null,
+       "top": null,
+       "visibility": null,
+       "width": null
+      }
+     }
+    },
+    "version_major": 2,
+    "version_minor": 0
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

XgBoost.py/XgBoost_ver1.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

XgBoost.py/XgBoost_ver2.ipynb ADDED Viewed

	@@ -0,0 +1,1257 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "459944e1",
+   "metadata": {
+    "_cell_guid": "b1076dfc-b9ad-4769-8c92-a6c4dae69d19",
+    "_uuid": "8f2839f25d086af736a60e9eeb907d3b93b6e0e5",
+    "execution": {
+     "iopub.execute_input": "2023-07-16T14:00:17.517706Z",
+     "iopub.status.busy": "2023-07-16T14:00:17.517322Z",
+     "iopub.status.idle": "2023-07-16T14:00:17.532915Z",
+     "shell.execute_reply": "2023-07-16T14:00:17.531918Z"
+    },
+    "papermill": {
+     "duration": 0.028949,
+     "end_time": "2023-07-16T14:00:17.535784",
+     "exception": false,
+     "start_time": "2023-07-16T14:00:17.506835",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "/kaggle/input/icr-identify-age-related-conditions/sample_submission.csv\n",
+      "/kaggle/input/icr-identify-age-related-conditions/greeks.csv\n",
+      "/kaggle/input/icr-identify-age-related-conditions/train.csv\n",
+      "/kaggle/input/icr-identify-age-related-conditions/test.csv\n"
+     ]
+    }
+   ],
+   "source": [
+    "# This Python 3 environment comes with many helpful analytics libraries installed\n",
+    "# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python\n",
+    "# For example, here's several helpful packages to load\n",
+    "\n",
+    "import numpy as np # linear algebra\n",
+    "import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)\n",
+    "\n",
+    "# Input data files are available in the read-only \"../input/\" directory\n",
+    "# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory\n",
+    "\n",
+    "import os\n",
+    "for dirname, _, filenames in os.walk('/kaggle/input'):\n",
+    "    for filename in filenames:\n",
+    "        print(os.path.join(dirname, filename))\n",
+    "\n",
+    "# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using \"Save & Run All\" \n",
+    "# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "6f1fa358",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-16T14:00:17.554886Z",
+     "iopub.status.busy": "2023-07-16T14:00:17.553876Z",
+     "iopub.status.idle": "2023-07-16T14:00:21.000320Z",
+     "shell.execute_reply": "2023-07-16T14:00:20.998572Z"
+    },
+    "papermill": {
+     "duration": 3.459261,
+     "end_time": "2023-07-16T14:00:21.003513",
+     "exception": false,
+     "start_time": "2023-07-16T14:00:17.544252",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/opt/conda/lib/python3.10/site-packages/scipy/__init__.py:146: UserWarning: A NumPy version >=1.16.5 and <1.23.0 is required for this version of SciPy (detected version 1.23.5\n",
+      "  warnings.warn(f\"A NumPy version >={np_minversion} and <{np_maxversion}\"\n"
+     ]
+    }
+   ],
+   "source": [
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "from sklearn.preprocessing import LabelEncoder,normalize\n",
+    "from sklearn.ensemble import GradientBoostingClassifier,RandomForestClassifier\n",
+    "from sklearn.metrics import accuracy_score\n",
+    "from sklearn.impute import SimpleImputer\n",
+    "import imblearn\n",
+    "from imblearn.over_sampling import RandomOverSampler\n",
+    "from imblearn.under_sampling import RandomUnderSampler\n",
+    "import xgboost\n",
+    "import inspect\n",
+    "from collections import defaultdict\n",
+    "#from tabpfn import TabPFNClassifier\n",
+    "import lightgbm as lgb\n",
+    "import warnings\n",
+    "warnings.filterwarnings('ignore')\n",
+    "from sklearn.model_selection import KFold as KF, GridSearchCV\n",
+    "from tqdm.notebook import tqdm\n",
+    "from datetime import datetime"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "edcd8543",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-16T14:00:21.023332Z",
+     "iopub.status.busy": "2023-07-16T14:00:21.022856Z",
+     "iopub.status.idle": "2023-07-16T14:00:21.088898Z",
+     "shell.execute_reply": "2023-07-16T14:00:21.087472Z"
+    },
+    "papermill": {
+     "duration": 0.079354,
+     "end_time": "2023-07-16T14:00:21.092191",
+     "exception": false,
+     "start_time": "2023-07-16T14:00:21.012837",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "\n",
+    "train_df = pd.read_csv(\"/kaggle/input/icr-identify-age-related-conditions/train.csv\")\n",
+    "test_df = pd.read_csv(\"/kaggle/input/icr-identify-age-related-conditions/test.csv\")\n",
+    "greeks_df = pd.read_csv(\"/kaggle/input/icr-identify-age-related-conditions/greeks.csv\")\n",
+    "sample_submission = pd.read_csv(\"/kaggle/input/icr-identify-age-related-conditions/sample_submission.csv\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "4da67da3",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-16T14:00:21.111296Z",
+     "iopub.status.busy": "2023-07-16T14:00:21.110878Z",
+     "iopub.status.idle": "2023-07-16T14:00:21.126219Z",
+     "shell.execute_reply": "2023-07-16T14:00:21.125017Z"
+    },
+    "papermill": {
+     "duration": 0.028324,
+     "end_time": "2023-07-16T14:00:21.129087",
+     "exception": false,
+     "start_time": "2023-07-16T14:00:21.100763",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "\n",
+    "first_category = train_df.EJ.unique()[0]\n",
+    "train_df.EJ = train_df.EJ.eq(first_category).astype('int')\n",
+    "test_df.EJ = test_df.EJ.eq(first_category).astype('int')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "5d460a95",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-16T14:00:21.148826Z",
+     "iopub.status.busy": "2023-07-16T14:00:21.148442Z",
+     "iopub.status.idle": "2023-07-16T14:00:21.155072Z",
+     "shell.execute_reply": "2023-07-16T14:00:21.153937Z"
+    },
+    "papermill": {
+     "duration": 0.020215,
+     "end_time": "2023-07-16T14:00:21.157532",
+     "exception": false,
+     "start_time": "2023-07-16T14:00:21.137317",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "\n",
+    "def random_under_sampler(df):\n",
+    " neg, pos = np.bincount(df['Class'])\n",
+    " one_df = df.loc[df['Class'] == 1]\n",
+    " zero_df = df.loc[df['Class'] == 0]\n",
+    " zero_df = zero_df.sample(n=pos)\n",
+    " undersampled_df = pd.concat([zero_df, one_df])\n",
+    " return undersampled_df.sample(frac = 1)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "20ce7205",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-16T14:00:21.175607Z",
+     "iopub.status.busy": "2023-07-16T14:00:21.174813Z",
+     "iopub.status.idle": "2023-07-16T14:00:21.188987Z",
+     "shell.execute_reply": "2023-07-16T14:00:21.187603Z"
+    },
+    "papermill": {
+     "duration": 0.0262,
+     "end_time": "2023-07-16T14:00:21.191636",
+     "exception": false,
+     "start_time": "2023-07-16T14:00:21.165436",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "train_df_good = random_under_sampler(train_df)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "f5cb2e31",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-16T14:00:21.210828Z",
+     "iopub.status.busy": "2023-07-16T14:00:21.209550Z",
+     "iopub.status.idle": "2023-07-16T14:00:21.219108Z",
+     "shell.execute_reply": "2023-07-16T14:00:21.217884Z"
+    },
+    "papermill": {
+     "duration": 0.021809,
+     "end_time": "2023-07-16T14:00:21.221645",
+     "exception": false,
+     "start_time": "2023-07-16T14:00:21.199836",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(216, 58)"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "\n",
+    "train_df_good.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "6b3c6991",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-16T14:00:21.241474Z",
+     "iopub.status.busy": "2023-07-16T14:00:21.241030Z",
+     "iopub.status.idle": "2023-07-16T14:00:21.251174Z",
+     "shell.execute_reply": "2023-07-16T14:00:21.249737Z"
+    },
+    "papermill": {
+     "duration": 0.02339,
+     "end_time": "2023-07-16T14:00:21.253719",
+     "exception": false,
+     "start_time": "2023-07-16T14:00:21.230329",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "predictor_columns = [n for n in train_df.columns if n != 'Class' and n != 'Id']\n",
+    "x= train_df[predictor_columns]\n",
+    "y = train_df['Class']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "7bb0f756",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-16T14:00:21.273356Z",
+     "iopub.status.busy": "2023-07-16T14:00:21.272869Z",
+     "iopub.status.idle": "2023-07-16T14:00:21.278998Z",
+     "shell.execute_reply": "2023-07-16T14:00:21.277841Z"
+    },
+    "papermill": {
+     "duration": 0.018676,
+     "end_time": "2023-07-16T14:00:21.281241",
+     "exception": false,
+     "start_time": "2023-07-16T14:00:21.262565",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "cv_outer = KF(n_splits = 10, shuffle=True, random_state=42)\n",
+    "cv_inner = KF(n_splits = 5, shuffle=True, random_state=42)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "2cf318d7",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-16T14:00:21.300708Z",
+     "iopub.status.busy": "2023-07-16T14:00:21.299943Z",
+     "iopub.status.idle": "2023-07-16T14:00:21.308230Z",
+     "shell.execute_reply": "2023-07-16T14:00:21.307088Z"
+    },
+    "papermill": {
+     "duration": 0.020883,
+     "end_time": "2023-07-16T14:00:21.310917",
+     "exception": false,
+     "start_time": "2023-07-16T14:00:21.290034",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "def balanced_log_loss(y_true, y_pred):\n",
+    " N_0 = np.sum(1 - y_true)\n",
+    " N_1 = np.sum(y_true)\n",
+    " \n",
+    " w_0 = 1 / N_0\n",
+    " w_1 = 1 / N_1\n",
+    " p_1 = np.clip(y_pred, 1e-15, 1 - 1e-15)\n",
+    " p_0 = 1 - p_1\n",
+    " log_loss_0 = -np.sum((1 - y_true) * np.log(p_0))\n",
+    " log_loss_1 = -np.sum(y_true * np.log(p_1))\n",
+    " balanced_log_loss = 2*(w_0 * log_loss_0 + w_1 * log_loss_1) / (w_0 + w_1)\n",
+    " return balanced_log_loss/(N_0+N_1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "e21b0bd7",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-16T14:00:21.331052Z",
+     "iopub.status.busy": "2023-07-16T14:00:21.330612Z",
+     "iopub.status.idle": "2023-07-16T14:00:21.343946Z",
+     "shell.execute_reply": "2023-07-16T14:00:21.343016Z"
+    },
+    "papermill": {
+     "duration": 0.026312,
+     "end_time": "2023-07-16T14:00:21.346767",
+     "exception": false,
+     "start_time": "2023-07-16T14:00:21.320455",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "class Ensemble():\n",
+    "    def __init__(self):\n",
+    "        self.imputer = SimpleImputer(missing_values=np.nan, strategy='median')\n",
+    "        self.classifiers =[xgboost.XGBClassifier(n_estimators=100,max_depth=3,learning_rate=0.2,subsample=0.9,colsample_bytree=0.85),\n",
+    "                           xgboost.XGBClassifier()]\n",
+    "                           #TabPFNClassifier(N_ensemble_configurations=128),\n",
+    "                           #TabPFNClassifier(N_ensemble_configurations=48)]\n",
+    "    \n",
+    "    def fit(self,X,y):\n",
+    "        y = y.values\n",
+    "        unique_classes, y = np.unique(y, return_inverse=True)\n",
+    "        self.classes_ = unique_classes\n",
+    "        first_category = X.EJ.unique()[0]\n",
+    "        X.EJ = X.EJ.eq(first_category).astype('int')\n",
+    "        X = self.imputer.fit_transform(X)\n",
+    "\n",
+    "        for classifier in self.classifiers:\n",
+    "            #if classifier==self.classifiers[2] or classifier==self.classifiers[3]:\n",
+    "               # classifier.fit(X,y,overwrite_warning =True)\n",
+    "           # else :\n",
+    "                classifier.fit(X, y)\n",
+    "     \n",
+    "    def predict_proba(self, x):\n",
+    "        x = self.imputer.transform(x)\n",
+    "        probabilities = np.stack([classifier.predict_proba(x) for classifier in self.classifiers])\n",
+    "        averaged_probabilities = np.mean(probabilities, axis=0)\n",
+    "        class_0_est_instances = averaged_probabilities[:, 0].sum()\n",
+    "        others_est_instances = averaged_probabilities[:, 1:].sum()\n",
+    "        # Weighted probabilities based on class imbalance\n",
+    "        new_probabilities = averaged_probabilities * np.array([[1/(class_0_est_instances if i==0 else others_est_instances) for i in range(averaged_probabilities.shape[1])]])\n",
+    "        return new_probabilities / np.sum(new_probabilities, axis=1, keepdims=1) \n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "38907581",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-16T14:00:21.367308Z",
+     "iopub.status.busy": "2023-07-16T14:00:21.366896Z",
+     "iopub.status.idle": "2023-07-16T14:00:21.379770Z",
+     "shell.execute_reply": "2023-07-16T14:00:21.378370Z"
+    },
+    "papermill": {
+     "duration": 0.026467,
+     "end_time": "2023-07-16T14:00:21.382714",
+     "exception": false,
+     "start_time": "2023-07-16T14:00:21.356247",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "def training(model, x,y,y_meta):\n",
+    "    outer_results = list()\n",
+    "    best_loss = np.inf\n",
+    "    split = 0\n",
+    "    splits = 5\n",
+    "    for train_idx,val_idx in tqdm(cv_inner.split(x), total = splits):\n",
+    "        split+=1\n",
+    "        x_train, x_val = x.iloc[train_idx],x.iloc[val_idx]\n",
+    "        y_train, y_val = y_meta.iloc[train_idx], y.iloc[val_idx]\n",
+    "                \n",
+    "        model.fit(x_train, y_train)\n",
+    "        y_pred = model.predict_proba(x_val)\n",
+    "        probabilities = np.concatenate((y_pred[:,:1], np.sum(y_pred[:,1:], 1, keepdims=True)), axis=1)\n",
+    "        p0 = probabilities[:,:1]\n",
+    "        p0[p0 > 0.86] = 1\n",
+    "        p0[p0 < 0.14] = 0\n",
+    "        y_p = np.empty((y_pred.shape[0],))\n",
+    "        for i in range(y_pred.shape[0]):\n",
+    "            if p0[i]>=0.5:\n",
+    "                y_p[i]= False\n",
+    "            else :\n",
+    "                y_p[i]=True\n",
+    "        y_p = y_p.astype(int)\n",
+    "        loss = balanced_log_loss(y_val,y_p)\n",
+    "\n",
+    "        if loss<best_loss:\n",
+    "            best_model = model\n",
+    "            best_loss = loss\n",
+    "            print('best_model_saved')\n",
+    "        outer_results.append(loss)\n",
+    "        print('>val_loss=%.5f, split = %.1f' % (loss,split))\n",
+    "    print('LOSS: %.5f' % (np.mean(outer_results)))\n",
+    "    return best_model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "id": "5783c385",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-16T14:00:21.401915Z",
+     "iopub.status.busy": "2023-07-16T14:00:21.401467Z",
+     "iopub.status.idle": "2023-07-16T14:00:21.425585Z",
+     "shell.execute_reply": "2023-07-16T14:00:21.424538Z"
+    },
+    "papermill": {
+     "duration": 0.036977,
+     "end_time": "2023-07-16T14:00:21.428411",
+     "exception": false,
+     "start_time": "2023-07-16T14:00:21.391434",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "times = greeks_df.Epsilon.copy()\n",
+    "times[greeks_df.Epsilon != 'Unknown'] = greeks_df.Epsilon[greeks_df.Epsilon != 'Unknown'].map(lambda x: datetime.strptime(x,'%m/%d/%Y').toordinal())\n",
+    "times[greeks_df.Epsilon == 'Unknown'] = np.nan"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "id": "b0264cd3",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-16T14:00:21.449211Z",
+     "iopub.status.busy": "2023-07-16T14:00:21.448215Z",
+     "iopub.status.idle": "2023-07-16T14:00:21.463314Z",
+     "shell.execute_reply": "2023-07-16T14:00:21.462240Z"
+    },
+    "papermill": {
+     "duration": 0.028401,
+     "end_time": "2023-07-16T14:00:21.466328",
+     "exception": false,
+     "start_time": "2023-07-16T14:00:21.437927",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "train_pred_and_time = pd.concat((train_df, times), axis=1)\n",
+    "test_predictors = test_df[predictor_columns]\n",
+    "first_category = test_predictors.EJ.unique()[0]\n",
+    "test_predictors.EJ = test_predictors.EJ.eq(first_category).astype('int')\n",
+    "test_pred_and_time = np.concatenate((test_predictors, np.zeros((len(test_predictors), 1)) + train_pred_and_time.Epsilon.max() + 1), axis=1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "id": "9c0d5363",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-16T14:00:21.487186Z",
+     "iopub.status.busy": "2023-07-16T14:00:21.486384Z",
+     "iopub.status.idle": "2023-07-16T14:00:21.530196Z",
+     "shell.execute_reply": "2023-07-16T14:00:21.528975Z"
+    },
+    "papermill": {
+     "duration": 0.057986,
+     "end_time": "2023-07-16T14:00:21.533007",
+     "exception": false,
+     "start_time": "2023-07-16T14:00:21.475021",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Original dataset shape\n",
+      "A    509\n",
+      "B     61\n",
+      "G     29\n",
+      "D     18\n",
+      "Name: Alpha, dtype: int64\n",
+      "Resample dataset shape\n",
+      "B    509\n",
+      "A    509\n",
+      "D    509\n",
+      "G    509\n",
+      "Name: Alpha, dtype: int64\n"
+     ]
+    }
+   ],
+   "source": [
+    "ros = RandomOverSampler(random_state=42)\n",
+    "train_ros, y_ros = ros.fit_resample(train_pred_and_time, greeks_df.Alpha)\n",
+    "print('Original dataset shape')\n",
+    "print(greeks_df.Alpha.value_counts())\n",
+    "print('Resample dataset shape')\n",
+    "print( y_ros.value_counts())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "id": "e375c3b6",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-16T14:00:21.552682Z",
+     "iopub.status.busy": "2023-07-16T14:00:21.551996Z",
+     "iopub.status.idle": "2023-07-16T14:00:21.560722Z",
+     "shell.execute_reply": "2023-07-16T14:00:21.559571Z"
+    },
+    "papermill": {
+     "duration": 0.021784,
+     "end_time": "2023-07-16T14:00:21.563678",
+     "exception": false,
+     "start_time": "2023-07-16T14:00:21.541894",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "\n",
+    "x_ros = train_ros.drop(['Class', 'Id'],axis=1)\n",
+    "y_ = train_ros.Class"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "id": "cdeca513",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-16T14:00:21.582486Z",
+     "iopub.status.busy": "2023-07-16T14:00:21.582001Z",
+     "iopub.status.idle": "2023-07-16T14:00:21.587344Z",
+     "shell.execute_reply": "2023-07-16T14:00:21.586235Z"
+    },
+    "papermill": {
+     "duration": 0.017561,
+     "end_time": "2023-07-16T14:00:21.589728",
+     "exception": false,
+     "start_time": "2023-07-16T14:00:21.572167",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "\n",
+    "yt = Ensemble()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "id": "01637483",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-16T14:00:21.608991Z",
+     "iopub.status.busy": "2023-07-16T14:00:21.608586Z",
+     "iopub.status.idle": "2023-07-16T14:00:29.681349Z",
+     "shell.execute_reply": "2023-07-16T14:00:29.680406Z"
+    },
+    "papermill": {
+     "duration": 8.085504,
+     "end_time": "2023-07-16T14:00:29.683798",
+     "exception": false,
+     "start_time": "2023-07-16T14:00:21.598294",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "18b1b7454e0e48319b05b5a993f47cbd",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "  0%|          | 0/5 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "best_model_saved\n",
+      ">val_loss=0.49134, split = 1.0\n",
+      "best_model_saved\n",
+      ">val_loss=0.24771, split = 2.0\n",
+      "best_model_saved\n",
+      ">val_loss=0.00000, split = 3.0\n",
+      ">val_loss=0.13220, split = 4.0\n",
+      ">val_loss=0.40159, split = 5.0\n",
+      "LOSS: 0.25457\n"
+     ]
+    }
+   ],
+   "source": [
+    "\n",
+    "m = training(yt,x_ros,y_,y_ros)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "id": "52603914",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-16T14:00:29.705219Z",
+     "iopub.status.busy": "2023-07-16T14:00:29.704713Z",
+     "iopub.status.idle": "2023-07-16T14:00:29.715873Z",
+     "shell.execute_reply": "2023-07-16T14:00:29.714582Z"
+    },
+    "papermill": {
+     "duration": 0.025774,
+     "end_time": "2023-07-16T14:00:29.718965",
+     "exception": false,
+     "start_time": "2023-07-16T14:00:29.693191",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "1    0.75\n",
+       "0    0.25\n",
+       "Name: Class, dtype: float64"
+      ]
+     },
+     "execution_count": 19,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "y_.value_counts()/y_.shape[0]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "id": "754e9c23",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-16T14:00:29.740372Z",
+     "iopub.status.busy": "2023-07-16T14:00:29.739879Z",
+     "iopub.status.idle": "2023-07-16T14:00:29.752997Z",
+     "shell.execute_reply": "2023-07-16T14:00:29.752086Z"
+    },
+    "papermill": {
+     "duration": 0.027082,
+     "end_time": "2023-07-16T14:00:29.755427",
+     "exception": false,
+     "start_time": "2023-07-16T14:00:29.728345",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "\n",
+    "y_pred = m.predict_proba(test_pred_and_time)\n",
+    "probabilities = np.concatenate((y_pred[:,:1], np.sum(y_pred[:,1:], 1, keepdims=True)), axis=1)\n",
+    "p0 = probabilities[:,:1]\n",
+    "p0[p0 > 0.59] = 1\n",
+    "p0[p0 < 0.28] = 0"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "id": "b6505a78",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-16T14:00:29.777401Z",
+     "iopub.status.busy": "2023-07-16T14:00:29.776987Z",
+     "iopub.status.idle": "2023-07-16T14:00:29.790086Z",
+     "shell.execute_reply": "2023-07-16T14:00:29.788839Z"
+    },
+    "papermill": {
+     "duration": 0.026657,
+     "end_time": "2023-07-16T14:00:29.793006",
+     "exception": false,
+     "start_time": "2023-07-16T14:00:29.766349",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "\n",
+    "submission = pd.DataFrame(test_df[\"Id\"], columns=[\"Id\"])\n",
+    "submission[\"class_0\"] = p0\n",
+    "submission[\"class_1\"] = 1 - p0\n",
+    "submission.to_csv('submission.csv', index=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "id": "a185d0b6",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-16T14:00:29.812749Z",
+     "iopub.status.busy": "2023-07-16T14:00:29.812370Z",
+     "iopub.status.idle": "2023-07-16T14:00:29.832503Z",
+     "shell.execute_reply": "2023-07-16T14:00:29.831080Z"
+    },
+    "papermill": {
+     "duration": 0.033126,
+     "end_time": "2023-07-16T14:00:29.835328",
+     "exception": false,
+     "start_time": "2023-07-16T14:00:29.802202",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Id</th>\n",
+       "      <th>class_0</th>\n",
+       "      <th>class_1</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>00eed32682bb</td>\n",
+       "      <td>0.5</td>\n",
+       "      <td>0.5</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>010ebe33f668</td>\n",
+       "      <td>0.5</td>\n",
+       "      <td>0.5</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>02fa521e1838</td>\n",
+       "      <td>0.5</td>\n",
+       "      <td>0.5</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>040e15f562a2</td>\n",
+       "      <td>0.5</td>\n",
+       "      <td>0.5</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>046e85c7cc7f</td>\n",
+       "      <td>0.5</td>\n",
+       "      <td>0.5</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "             Id  class_0  class_1\n",
+       "0  00eed32682bb      0.5      0.5\n",
+       "1  010ebe33f668      0.5      0.5\n",
+       "2  02fa521e1838      0.5      0.5\n",
+       "3  040e15f562a2      0.5      0.5\n",
+       "4  046e85c7cc7f      0.5      0.5"
+      ]
+     },
+     "execution_count": 22,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "submission_df = pd.read_csv('submission.csv')\n",
+    "submission_df"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.12"
+  },
+  "papermill": {
+   "default_parameters": {},
+   "duration": 25.292095,
+   "end_time": "2023-07-16T14:00:30.969842",
+   "environment_variables": {},
+   "exception": null,
+   "input_path": "__notebook__.ipynb",
+   "output_path": "__notebook__.ipynb",
+   "parameters": {},
+   "start_time": "2023-07-16T14:00:05.677747",
+   "version": "2.4.0"
+  },
+  "widgets": {
+   "application/vnd.jupyter.widget-state+json": {
+    "state": {
+     "016218d199874567ad89e22e2df78d64": {
+      "model_module": "@jupyter-widgets/controls",
+      "model_module_version": "1.5.0",
+      "model_name": "DescriptionStyleModel",
+      "state": {
+       "_model_module": "@jupyter-widgets/controls",
+       "_model_module_version": "1.5.0",
+       "_model_name": "DescriptionStyleModel",
+       "_view_count": null,
+       "_view_module": "@jupyter-widgets/base",
+       "_view_module_version": "1.2.0",
+       "_view_name": "StyleView",
+       "description_width": ""
+      }
+     },
+     "03bdf5df19ba49cd96e4b5209e92c6ab": {
+      "model_module": "@jupyter-widgets/controls",
+      "model_module_version": "1.5.0",
+      "model_name": "ProgressStyleModel",
+      "state": {
+       "_model_module": "@jupyter-widgets/controls",
+       "_model_module_version": "1.5.0",
+       "_model_name": "ProgressStyleModel",
+       "_view_count": null,
+       "_view_module": "@jupyter-widgets/base",
+       "_view_module_version": "1.2.0",
+       "_view_name": "StyleView",
+       "bar_color": null,
+       "description_width": ""
+      }
+     },
+     "03e57d986f05468b899b8030b167a552": {
+      "model_module": "@jupyter-widgets/base",
+      "model_module_version": "1.2.0",
+      "model_name": "LayoutModel",
+      "state": {
+       "_model_module": "@jupyter-widgets/base",
+       "_model_module_version": "1.2.0",
+       "_model_name": "LayoutModel",
+       "_view_count": null,
+       "_view_module": "@jupyter-widgets/base",
+       "_view_module_version": "1.2.0",
+       "_view_name": "LayoutView",
+       "align_content": null,
+       "align_items": null,
+       "align_self": null,
+       "border": null,
+       "bottom": null,
+       "display": null,
+       "flex": null,
+       "flex_flow": null,
+       "grid_area": null,
+       "grid_auto_columns": null,
+       "grid_auto_flow": null,
+       "grid_auto_rows": null,
+       "grid_column": null,
+       "grid_gap": null,
+       "grid_row": null,
+       "grid_template_areas": null,
+       "grid_template_columns": null,
+       "grid_template_rows": null,
+       "height": null,
+       "justify_content": null,
+       "justify_items": null,
+       "left": null,
+       "margin": null,
+       "max_height": null,
+       "max_width": null,
+       "min_height": null,
+       "min_width": null,
+       "object_fit": null,
+       "object_position": null,
+       "order": null,
+       "overflow": null,
+       "overflow_x": null,
+       "overflow_y": null,
+       "padding": null,
+       "right": null,
+       "top": null,
+       "visibility": null,
+       "width": null
+      }
+     },
+     "13642f0614b145fd9bb3043dac743a62": {
+      "model_module": "@jupyter-widgets/base",
+      "model_module_version": "1.2.0",
+      "model_name": "LayoutModel",
+      "state": {
+       "_model_module": "@jupyter-widgets/base",
+       "_model_module_version": "1.2.0",
+       "_model_name": "LayoutModel",
+       "_view_count": null,
+       "_view_module": "@jupyter-widgets/base",
+       "_view_module_version": "1.2.0",
+       "_view_name": "LayoutView",
+       "align_content": null,
+       "align_items": null,
+       "align_self": null,
+       "border": null,
+       "bottom": null,
+       "display": null,
+       "flex": null,
+       "flex_flow": null,
+       "grid_area": null,
+       "grid_auto_columns": null,
+       "grid_auto_flow": null,
+       "grid_auto_rows": null,
+       "grid_column": null,
+       "grid_gap": null,
+       "grid_row": null,
+       "grid_template_areas": null,
+       "grid_template_columns": null,
+       "grid_template_rows": null,
+       "height": null,
+       "justify_content": null,
+       "justify_items": null,
+       "left": null,
+       "margin": null,
+       "max_height": null,
+       "max_width": null,
+       "min_height": null,
+       "min_width": null,
+       "object_fit": null,
+       "object_position": null,
+       "order": null,
+       "overflow": null,
+       "overflow_x": null,
+       "overflow_y": null,
+       "padding": null,
+       "right": null,
+       "top": null,
+       "visibility": null,
+       "width": null
+      }
+     },
+     "18b1b7454e0e48319b05b5a993f47cbd": {
+      "model_module": "@jupyter-widgets/controls",
+      "model_module_version": "1.5.0",
+      "model_name": "HBoxModel",
+      "state": {
+       "_dom_classes": [],
+       "_model_module": "@jupyter-widgets/controls",
+       "_model_module_version": "1.5.0",
+       "_model_name": "HBoxModel",
+       "_view_count": null,
+       "_view_module": "@jupyter-widgets/controls",
+       "_view_module_version": "1.5.0",
+       "_view_name": "HBoxView",
+       "box_style": "",
+       "children": [
+        "IPY_MODEL_6c64067679314b1fbda62eaa0ea1c212",
+        "IPY_MODEL_dd388a4e894d44f5b3b44bf9a8804c11",
+        "IPY_MODEL_212e14b55d984de5958562a66c4e3a79"
+       ],
+       "layout": "IPY_MODEL_adba34cf672e4fde858e50d870323f36"
+      }
+     },
+     "212e14b55d984de5958562a66c4e3a79": {
+      "model_module": "@jupyter-widgets/controls",
+      "model_module_version": "1.5.0",
+      "model_name": "HTMLModel",
+      "state": {
+       "_dom_classes": [],
+       "_model_module": "@jupyter-widgets/controls",
+       "_model_module_version": "1.5.0",
+       "_model_name": "HTMLModel",
+       "_view_count": null,
+       "_view_module": "@jupyter-widgets/controls",
+       "_view_module_version": "1.5.0",
+       "_view_name": "HTMLView",
+       "description": "",
+       "description_tooltip": null,
+       "layout": "IPY_MODEL_13642f0614b145fd9bb3043dac743a62",
+       "placeholder": "",
+       "style": "IPY_MODEL_016218d199874567ad89e22e2df78d64",
+       "value": " 5/5 [00:08&lt;00:00,  1.61s/it]"
+      }
+     },
+     "307cdcab85c640fa9230e743fc8a4849": {
+      "model_module": "@jupyter-widgets/base",
+      "model_module_version": "1.2.0",
+      "model_name": "LayoutModel",
+      "state": {
+       "_model_module": "@jupyter-widgets/base",
+       "_model_module_version": "1.2.0",
+       "_model_name": "LayoutModel",
+       "_view_count": null,
+       "_view_module": "@jupyter-widgets/base",
+       "_view_module_version": "1.2.0",
+       "_view_name": "LayoutView",
+       "align_content": null,
+       "align_items": null,
+       "align_self": null,
+       "border": null,
+       "bottom": null,
+       "display": null,
+       "flex": null,
+       "flex_flow": null,
+       "grid_area": null,
+       "grid_auto_columns": null,
+       "grid_auto_flow": null,
+       "grid_auto_rows": null,
+       "grid_column": null,
+       "grid_gap": null,
+       "grid_row": null,
+       "grid_template_areas": null,
+       "grid_template_columns": null,
+       "grid_template_rows": null,
+       "height": null,
+       "justify_content": null,
+       "justify_items": null,
+       "left": null,
+       "margin": null,
+       "max_height": null,
+       "max_width": null,
+       "min_height": null,
+       "min_width": null,
+       "object_fit": null,
+       "object_position": null,
+       "order": null,
+       "overflow": null,
+       "overflow_x": null,
+       "overflow_y": null,
+       "padding": null,
+       "right": null,
+       "top": null,
+       "visibility": null,
+       "width": null
+      }
+     },
+     "65aed14b169c438495157fc4afebbb6e": {
+      "model_module": "@jupyter-widgets/controls",
+      "model_module_version": "1.5.0",
+      "model_name": "DescriptionStyleModel",
+      "state": {
+       "_model_module": "@jupyter-widgets/controls",
+       "_model_module_version": "1.5.0",
+       "_model_name": "DescriptionStyleModel",
+       "_view_count": null,
+       "_view_module": "@jupyter-widgets/base",
+       "_view_module_version": "1.2.0",
+       "_view_name": "StyleView",
+       "description_width": ""
+      }
+     },
+     "6c64067679314b1fbda62eaa0ea1c212": {
+      "model_module": "@jupyter-widgets/controls",
+      "model_module_version": "1.5.0",
+      "model_name": "HTMLModel",
+      "state": {
+       "_dom_classes": [],
+       "_model_module": "@jupyter-widgets/controls",
+       "_model_module_version": "1.5.0",
+       "_model_name": "HTMLModel",
+       "_view_count": null,
+       "_view_module": "@jupyter-widgets/controls",
+       "_view_module_version": "1.5.0",
+       "_view_name": "HTMLView",
+       "description": "",
+       "description_tooltip": null,
+       "layout": "IPY_MODEL_03e57d986f05468b899b8030b167a552",
+       "placeholder": "",
+       "style": "IPY_MODEL_65aed14b169c438495157fc4afebbb6e",
+       "value": "100%"
+      }
+     },
+     "adba34cf672e4fde858e50d870323f36": {
+      "model_module": "@jupyter-widgets/base",
+      "model_module_version": "1.2.0",
+      "model_name": "LayoutModel",
+      "state": {
+       "_model_module": "@jupyter-widgets/base",
+       "_model_module_version": "1.2.0",
+       "_model_name": "LayoutModel",
+       "_view_count": null,
+       "_view_module": "@jupyter-widgets/base",
+       "_view_module_version": "1.2.0",
+       "_view_name": "LayoutView",
+       "align_content": null,
+       "align_items": null,
+       "align_self": null,
+       "border": null,
+       "bottom": null,
+       "display": null,
+       "flex": null,
+       "flex_flow": null,
+       "grid_area": null,
+       "grid_auto_columns": null,
+       "grid_auto_flow": null,
+       "grid_auto_rows": null,
+       "grid_column": null,
+       "grid_gap": null,
+       "grid_row": null,
+       "grid_template_areas": null,
+       "grid_template_columns": null,
+       "grid_template_rows": null,
+       "height": null,
+       "justify_content": null,
+       "justify_items": null,
+       "left": null,
+       "margin": null,
+       "max_height": null,
+       "max_width": null,
+       "min_height": null,
+       "min_width": null,
+       "object_fit": null,
+       "object_position": null,
+       "order": null,
+       "overflow": null,
+       "overflow_x": null,
+       "overflow_y": null,
+       "padding": null,
+       "right": null,
+       "top": null,
+       "visibility": null,
+       "width": null
+      }
+     },
+     "dd388a4e894d44f5b3b44bf9a8804c11": {
+      "model_module": "@jupyter-widgets/controls",
+      "model_module_version": "1.5.0",
+      "model_name": "FloatProgressModel",
+      "state": {
+       "_dom_classes": [],
+       "_model_module": "@jupyter-widgets/controls",
+       "_model_module_version": "1.5.0",
+       "_model_name": "FloatProgressModel",
+       "_view_count": null,
+       "_view_module": "@jupyter-widgets/controls",
+       "_view_module_version": "1.5.0",
+       "_view_name": "ProgressView",
+       "bar_style": "success",
+       "description": "",
+       "description_tooltip": null,
+       "layout": "IPY_MODEL_307cdcab85c640fa9230e743fc8a4849",
+       "max": 5.0,
+       "min": 0.0,
+       "orientation": "horizontal",
+       "style": "IPY_MODEL_03bdf5df19ba49cd96e4b5209e92c6ab",
+       "value": 5.0
+      }
+     }
+    },
+    "version_major": 2,
+    "version_minor": 0
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

XgBoost.py/XgBoost_ver3.ipynb ADDED Viewed

	@@ -0,0 +1,1431 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "4ce707b2",
+   "metadata": {
+    "_cell_guid": "b1076dfc-b9ad-4769-8c92-a6c4dae69d19",
+    "_uuid": "8f2839f25d086af736a60e9eeb907d3b93b6e0e5",
+    "execution": {
+     "iopub.execute_input": "2023-07-21T15:52:46.226344Z",
+     "iopub.status.busy": "2023-07-21T15:52:46.225376Z",
+     "iopub.status.idle": "2023-07-21T15:52:46.245756Z",
+     "shell.execute_reply": "2023-07-21T15:52:46.244748Z"
+    },
+    "papermill": {
+     "duration": 0.034273,
+     "end_time": "2023-07-21T15:52:46.248766",
+     "exception": false,
+     "start_time": "2023-07-21T15:52:46.214493",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "/kaggle/input/icr-identify-age-related-conditions/sample_submission.csv\n",
+      "/kaggle/input/icr-identify-age-related-conditions/greeks.csv\n",
+      "/kaggle/input/icr-identify-age-related-conditions/train.csv\n",
+      "/kaggle/input/icr-identify-age-related-conditions/test.csv\n",
+      "/kaggle/input/tabpfn-019-whl/tabpfn-0.1.9-py3-none-any.whl\n",
+      "/kaggle/input/tabpfn-019-whl/prior_diff_real_checkpoint_n_0_epoch_42.cpkt\n",
+      "/kaggle/input/tabpfn-019-whl/prior_diff_real_checkpoint_n_0_epoch_100.cpkt\n"
+     ]
+    }
+   ],
+   "source": [
+    "# This Python 3 environment comes with many helpful analytics libraries installed\n",
+    "# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python\n",
+    "# For example, here's several helpful packages to load\n",
+    "\n",
+    "import numpy as np # linear algebra\n",
+    "import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)\n",
+    "\n",
+    "# Input data files are available in the read-only \"../input/\" directory\n",
+    "# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory\n",
+    "\n",
+    "import os\n",
+    "for dirname, _, filenames in os.walk('/kaggle/input'):\n",
+    "    for filename in filenames:\n",
+    "        print(os.path.join(dirname, filename))\n",
+    "\n",
+    "# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using \"Save & Run All\" \n",
+    "# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "9dd44f97",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-21T15:52:46.269079Z",
+     "iopub.status.busy": "2023-07-21T15:52:46.268284Z",
+     "iopub.status.idle": "2023-07-21T15:53:21.724129Z",
+     "shell.execute_reply": "2023-07-21T15:53:21.722662Z"
+    },
+    "papermill": {
+     "duration": 35.469555,
+     "end_time": "2023-07-21T15:53:21.727323",
+     "exception": false,
+     "start_time": "2023-07-21T15:52:46.257768",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Processing /kaggle/input/tabpfn-019-whl/tabpfn-0.1.9-py3-none-any.whl\r\n",
+      "Requirement already satisfied: numpy>=1.21.2 in /opt/conda/lib/python3.10/site-packages (from tabpfn==0.1.9) (1.23.5)\r\n",
+      "Requirement already satisfied: pyyaml>=5.4.1 in /opt/conda/lib/python3.10/site-packages (from tabpfn==0.1.9) (6.0)\r\n",
+      "Requirement already satisfied: requests>=2.23.0 in /opt/conda/lib/python3.10/site-packages (from tabpfn==0.1.9) (2.31.0)\r\n",
+      "Requirement already satisfied: scikit-learn>=0.24.2 in /opt/conda/lib/python3.10/site-packages (from tabpfn==0.1.9) (1.2.2)\r\n",
+      "Requirement already satisfied: torch>=1.9.0 in /opt/conda/lib/python3.10/site-packages (from tabpfn==0.1.9) (2.0.0+cpu)\r\n",
+      "Requirement already satisfied: charset-normalizer<4,>=2 in /opt/conda/lib/python3.10/site-packages (from requests>=2.23.0->tabpfn==0.1.9) (3.1.0)\r\n",
+      "Requirement already satisfied: idna<4,>=2.5 in /opt/conda/lib/python3.10/site-packages (from requests>=2.23.0->tabpfn==0.1.9) (3.4)\r\n",
+      "Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/conda/lib/python3.10/site-packages (from requests>=2.23.0->tabpfn==0.1.9) (1.26.15)\r\n",
+      "Requirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.10/site-packages (from requests>=2.23.0->tabpfn==0.1.9) (2023.5.7)\r\n",
+      "Requirement already satisfied: scipy>=1.3.2 in /opt/conda/lib/python3.10/site-packages (from scikit-learn>=0.24.2->tabpfn==0.1.9) (1.11.1)\r\n",
+      "Requirement already satisfied: joblib>=1.1.1 in /opt/conda/lib/python3.10/site-packages (from scikit-learn>=0.24.2->tabpfn==0.1.9) (1.2.0)\r\n",
+      "Requirement already satisfied: threadpoolctl>=2.0.0 in /opt/conda/lib/python3.10/site-packages (from scikit-learn>=0.24.2->tabpfn==0.1.9) (3.1.0)\r\n",
+      "Requirement already satisfied: filelock in /opt/conda/lib/python3.10/site-packages (from torch>=1.9.0->tabpfn==0.1.9) (3.12.2)\r\n",
+      "Requirement already satisfied: typing-extensions in /opt/conda/lib/python3.10/site-packages (from torch>=1.9.0->tabpfn==0.1.9) (4.6.3)\r\n",
+      "Requirement already satisfied: sympy in /opt/conda/lib/python3.10/site-packages (from torch>=1.9.0->tabpfn==0.1.9) (1.12)\r\n",
+      "Requirement already satisfied: networkx in /opt/conda/lib/python3.10/site-packages (from torch>=1.9.0->tabpfn==0.1.9) (3.1)\r\n",
+      "Requirement already satisfied: jinja2 in /opt/conda/lib/python3.10/site-packages (from torch>=1.9.0->tabpfn==0.1.9) (3.1.2)\r\n",
+      "Requirement already satisfied: MarkupSafe>=2.0 in /opt/conda/lib/python3.10/site-packages (from jinja2->torch>=1.9.0->tabpfn==0.1.9) (2.1.3)\r\n",
+      "Requirement already satisfied: mpmath>=0.19 in /opt/conda/lib/python3.10/site-packages (from sympy->torch>=1.9.0->tabpfn==0.1.9) (1.3.0)\r\n",
+      "Installing collected packages: tabpfn\r\n",
+      "Successfully installed tabpfn-0.1.9\r\n"
+     ]
+    }
+   ],
+   "source": [
+    "!pip install /kaggle/input/tabpfn-019-whl/tabpfn-0.1.9-py3-none-any.whl"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "c9b8d056",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-21T15:53:21.748477Z",
+     "iopub.status.busy": "2023-07-21T15:53:21.748021Z",
+     "iopub.status.idle": "2023-07-21T15:53:24.917894Z",
+     "shell.execute_reply": "2023-07-21T15:53:24.916503Z"
+    },
+    "papermill": {
+     "duration": 3.183937,
+     "end_time": "2023-07-21T15:53:24.920802",
+     "exception": false,
+     "start_time": "2023-07-21T15:53:21.736865",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "!mkdir /opt/conda/lib/python3.10/site-packages/tabpfn/models_diff\n",
+    "!cp /kaggle/input/tabpfn-019-whl/prior_diff_real_checkpoint_n_0_epoch_100.cpkt /opt/conda/lib/python3.10/site-packages/tabpfn/models_diff/"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "a30dcb9b",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-21T15:53:24.945098Z",
+     "iopub.status.busy": "2023-07-21T15:53:24.944662Z",
+     "iopub.status.idle": "2023-07-21T15:53:32.364411Z",
+     "shell.execute_reply": "2023-07-21T15:53:32.363046Z"
+    },
+    "papermill": {
+     "duration": 7.435592,
+     "end_time": "2023-07-21T15:53:32.367513",
+     "exception": false,
+     "start_time": "2023-07-21T15:53:24.931921",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/opt/conda/lib/python3.10/site-packages/scipy/__init__.py:146: UserWarning: A NumPy version >=1.16.5 and <1.23.0 is required for this version of SciPy (detected version 1.23.5\n",
+      "  warnings.warn(f\"A NumPy version >={np_minversion} and <{np_maxversion}\"\n"
+     ]
+    }
+   ],
+   "source": [
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "from sklearn.preprocessing import LabelEncoder,normalize\n",
+    "from sklearn.ensemble import GradientBoostingClassifier,RandomForestClassifier\n",
+    "from sklearn.metrics import accuracy_score\n",
+    "from sklearn.impute import SimpleImputer\n",
+    "import imblearn\n",
+    "from imblearn.over_sampling import RandomOverSampler\n",
+    "from imblearn.under_sampling import RandomUnderSampler\n",
+    "import xgboost\n",
+    "import inspect\n",
+    "from collections import defaultdict\n",
+    "from tabpfn import TabPFNClassifier\n",
+    "import lightgbm as lgb\n",
+    "import warnings\n",
+    "warnings.filterwarnings('ignore')\n",
+    "from sklearn.model_selection import KFold as KF, GridSearchCV\n",
+    "from tqdm.notebook import tqdm\n",
+    "from datetime import datetime"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "f0d7a8de",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-21T15:53:32.388083Z",
+     "iopub.status.busy": "2023-07-21T15:53:32.387679Z",
+     "iopub.status.idle": "2023-07-21T15:53:32.456602Z",
+     "shell.execute_reply": "2023-07-21T15:53:32.455535Z"
+    },
+    "papermill": {
+     "duration": 0.082612,
+     "end_time": "2023-07-21T15:53:32.459479",
+     "exception": false,
+     "start_time": "2023-07-21T15:53:32.376867",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "\n",
+    "train_df = pd.read_csv(\"/kaggle/input/icr-identify-age-related-conditions/train.csv\")\n",
+    "test_df = pd.read_csv(\"/kaggle/input/icr-identify-age-related-conditions/test.csv\")\n",
+    "greeks_df = pd.read_csv(\"/kaggle/input/icr-identify-age-related-conditions/greeks.csv\")\n",
+    "sample_submission = pd.read_csv(\"/kaggle/input/icr-identify-age-related-conditions/sample_submission.csv\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "4f425c0c",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-21T15:53:32.480524Z",
+     "iopub.status.busy": "2023-07-21T15:53:32.479901Z",
+     "iopub.status.idle": "2023-07-21T15:53:32.496468Z",
+     "shell.execute_reply": "2023-07-21T15:53:32.495077Z"
+    },
+    "papermill": {
+     "duration": 0.029964,
+     "end_time": "2023-07-21T15:53:32.498999",
+     "exception": false,
+     "start_time": "2023-07-21T15:53:32.469035",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "\n",
+    "f_c = train_df.EJ.unique()[0]\n",
+    "train_df.EJ = train_df.EJ.eq(f_c).astype('int')\n",
+    "test_df.EJ = test_df.EJ.eq(f_c).astype('int')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "a2d05095",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-21T15:53:32.519923Z",
+     "iopub.status.busy": "2023-07-21T15:53:32.519160Z",
+     "iopub.status.idle": "2023-07-21T15:53:32.526632Z",
+     "shell.execute_reply": "2023-07-21T15:53:32.525280Z"
+    },
+    "papermill": {
+     "duration": 0.020616,
+     "end_time": "2023-07-21T15:53:32.529035",
+     "exception": false,
+     "start_time": "2023-07-21T15:53:32.508419",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "\n",
+    "def random_under_sampler(df):\n",
+    " neg, pos = np.bincount(df['Class'])\n",
+    " one_df = df.loc[df['Class'] == 1]\n",
+    " zero_df = df.loc[df['Class'] == 0]\n",
+    " zero_df = zero_df.sample(n=pos)\n",
+    " undersampled_df = pd.concat([zero_df, one_df])\n",
+    " return undersampled_df.sample(frac = 1)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "4dbfd096",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-21T15:53:32.550312Z",
+     "iopub.status.busy": "2023-07-21T15:53:32.549074Z",
+     "iopub.status.idle": "2023-07-21T15:53:32.563566Z",
+     "shell.execute_reply": "2023-07-21T15:53:32.562452Z"
+    },
+    "papermill": {
+     "duration": 0.028002,
+     "end_time": "2023-07-21T15:53:32.566324",
+     "exception": false,
+     "start_time": "2023-07-21T15:53:32.538322",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "train_df_good = random_under_sampler(train_df)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "6bd48549",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-21T15:53:32.586802Z",
+     "iopub.status.busy": "2023-07-21T15:53:32.586406Z",
+     "iopub.status.idle": "2023-07-21T15:53:32.594042Z",
+     "shell.execute_reply": "2023-07-21T15:53:32.592939Z"
+    },
+    "papermill": {
+     "duration": 0.021077,
+     "end_time": "2023-07-21T15:53:32.596735",
+     "exception": false,
+     "start_time": "2023-07-21T15:53:32.575658",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(216, 58)"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "\n",
+    "train_df_good.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "fe4cad4c",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-21T15:53:32.617381Z",
+     "iopub.status.busy": "2023-07-21T15:53:32.616958Z",
+     "iopub.status.idle": "2023-07-21T15:53:32.626930Z",
+     "shell.execute_reply": "2023-07-21T15:53:32.625603Z"
+    },
+    "papermill": {
+     "duration": 0.023274,
+     "end_time": "2023-07-21T15:53:32.629359",
+     "exception": false,
+     "start_time": "2023-07-21T15:53:32.606085",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "predictor_columns = [n for n in train_df.columns if n != 'Class' and n != 'Id']\n",
+    "x= train_df[predictor_columns]\n",
+    "y = train_df['Class']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "dc4251ec",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-21T15:53:32.650279Z",
+     "iopub.status.busy": "2023-07-21T15:53:32.649843Z",
+     "iopub.status.idle": "2023-07-21T15:53:32.654686Z",
+     "shell.execute_reply": "2023-07-21T15:53:32.653811Z"
+    },
+    "papermill": {
+     "duration": 0.017862,
+     "end_time": "2023-07-21T15:53:32.656879",
+     "exception": false,
+     "start_time": "2023-07-21T15:53:32.639017",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "cv_outer = KF(n_splits = 10, shuffle=True, random_state=42)\n",
+    "cv_inner = KF(n_splits = 5, shuffle=True, random_state=42)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "55f481b0",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-21T15:53:32.677484Z",
+     "iopub.status.busy": "2023-07-21T15:53:32.677040Z",
+     "iopub.status.idle": "2023-07-21T15:53:32.684784Z",
+     "shell.execute_reply": "2023-07-21T15:53:32.683665Z"
+    },
+    "papermill": {
+     "duration": 0.020651,
+     "end_time": "2023-07-21T15:53:32.687050",
+     "exception": false,
+     "start_time": "2023-07-21T15:53:32.666399",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "def balanced_log_loss(y_true, y_pred):\n",
+    " N_0 = np.sum(1 - y_true)\n",
+    " N_1 = np.sum(y_true)\n",
+    " \n",
+    " w_0 = 1 / N_0\n",
+    " w_1 = 1 / N_1\n",
+    " p_1 = np.clip(y_pred, 1e-15, 1 - 1e-15)\n",
+    " p_0 = 1 - p_1\n",
+    " log_loss_0 = -np.sum((1 - y_true) * np.log(p_0))\n",
+    " log_loss_1 = -np.sum(y_true * np.log(p_1))\n",
+    " balanced_log_loss = 2*(w_0 * log_loss_0 + w_1 * log_loss_1) / (w_0 + w_1)\n",
+    " return balanced_log_loss/(N_0+N_1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "id": "8a4e9d35",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-21T15:53:32.707920Z",
+     "iopub.status.busy": "2023-07-21T15:53:32.707502Z",
+     "iopub.status.idle": "2023-07-21T15:53:32.721521Z",
+     "shell.execute_reply": "2023-07-21T15:53:32.720277Z"
+    },
+    "papermill": {
+     "duration": 0.027412,
+     "end_time": "2023-07-21T15:53:32.723937",
+     "exception": false,
+     "start_time": "2023-07-21T15:53:32.696525",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "class Ensemble():\n",
+    "    def __init__(self):\n",
+    "        self.imputer = SimpleImputer(missing_values=np.nan, strategy='median')\n",
+    "        self.classifiers =[xgboost.XGBClassifier(n_estimators=100,max_depth=3,learning_rate=0.2,subsample=0.9,colsample_bytree=0.85),\n",
+    "                           xgboost.XGBClassifier(),\n",
+    "                           TabPFNClassifier(N_ensemble_configurations=128),\n",
+    "                           TabPFNClassifier(N_ensemble_configurations=48)]\n",
+    "    \n",
+    "    def fit(self,X,y):\n",
+    "        y = y.values\n",
+    "        unique_classes, y = np.unique(y, return_inverse=True)\n",
+    "        self.classes_ = unique_classes\n",
+    "        first_category = X.EJ.unique()[0]\n",
+    "        X.EJ = X.EJ.eq(first_category).astype('int')\n",
+    "        X = self.imputer.fit_transform(X)\n",
+    "\n",
+    "        for classifier in self.classifiers:\n",
+    "            if classifier==self.classifiers[2] or classifier==self.classifiers[3]:\n",
+    "                classifier.fit(X,y,overwrite_warning =True)\n",
+    "            else :\n",
+    "                classifier.fit(X, y)\n",
+    "     \n",
+    "    def predict_proba(self, x):\n",
+    "        x = self.imputer.transform(x)\n",
+    "        probabilities = np.stack([classifier.predict_proba(x) for classifier in self.classifiers])\n",
+    "        averaged_probabilities = np.mean(probabilities, axis=0)\n",
+    "        class_0_est_instances = averaged_probabilities[:, 0].sum()\n",
+    "        others_est_instances = averaged_probabilities[:, 1:].sum()\n",
+    "        # Weighted probabilities based on class imbalance\n",
+    "        new_probabilities = averaged_probabilities * np.array([[1/(class_0_est_instances if i==0 else others_est_instances) for i in range(averaged_probabilities.shape[1])]])\n",
+    "        return new_probabilities / np.sum(new_probabilities, axis=1, keepdims=1) "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "id": "2d8e9bac",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-21T15:53:32.745072Z",
+     "iopub.status.busy": "2023-07-21T15:53:32.744663Z",
+     "iopub.status.idle": "2023-07-21T15:53:32.757423Z",
+     "shell.execute_reply": "2023-07-21T15:53:32.756143Z"
+    },
+    "papermill": {
+     "duration": 0.026546,
+     "end_time": "2023-07-21T15:53:32.760075",
+     "exception": false,
+     "start_time": "2023-07-21T15:53:32.733529",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "def training(model, x,y,y_meta):\n",
+    "    outer_results = list()\n",
+    "    best_loss = np.inf\n",
+    "    split = 0\n",
+    "    splits = 5\n",
+    "    for train_idx,val_idx in tqdm(cv_inner.split(x), total = splits):\n",
+    "        split+=1\n",
+    "        x_train, x_val = x.iloc[train_idx],x.iloc[val_idx]\n",
+    "        y_train, y_val = y_meta.iloc[train_idx], y.iloc[val_idx]\n",
+    "                \n",
+    "        model.fit(x_train, y_train)\n",
+    "        y_pred = model.predict_proba(x_val)\n",
+    "        probabilities = np.concatenate((y_pred[:,:1], np.sum(y_pred[:,1:], 1, keepdims=True)), axis=1)\n",
+    "        p0 = probabilities[:,:1]\n",
+    "        p0[p0 > 0.86] = 1\n",
+    "        p0[p0 < 0.14] = 0\n",
+    "        y_p = np.empty((y_pred.shape[0],))\n",
+    "        for i in range(y_pred.shape[0]):\n",
+    "            if p0[i]>=0.5:\n",
+    "                y_p[i]= False\n",
+    "            else :\n",
+    "                y_p[i]=True\n",
+    "        y_p = y_p.astype(int)\n",
+    "        loss = balanced_log_loss(y_val,y_p)\n",
+    "\n",
+    "        if loss<best_loss:\n",
+    "            best_model = model\n",
+    "            best_loss = loss\n",
+    "            print('best_model_saved')\n",
+    "        outer_results.append(loss)\n",
+    "        print('>val_loss=%.5f, split = %.1f' % (loss,split))\n",
+    "    print('LOSS: %.5f' % (np.mean(outer_results)))\n",
+    "    return best_model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "id": "eac85a04",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-21T15:53:32.780758Z",
+     "iopub.status.busy": "2023-07-21T15:53:32.780354Z",
+     "iopub.status.idle": "2023-07-21T15:53:32.804335Z",
+     "shell.execute_reply": "2023-07-21T15:53:32.802984Z"
+    },
+    "papermill": {
+     "duration": 0.03723,
+     "end_time": "2023-07-21T15:53:32.806892",
+     "exception": false,
+     "start_time": "2023-07-21T15:53:32.769662",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "times = greeks_df.Epsilon.copy()\n",
+    "times[greeks_df.Epsilon != 'Unknown'] = greeks_df.Epsilon[greeks_df.Epsilon != 'Unknown'].map(lambda x: datetime.strptime(x,'%m/%d/%Y').toordinal())\n",
+    "times[greeks_df.Epsilon == 'Unknown'] = np.nan"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "id": "13f7db8f",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-21T15:53:32.827820Z",
+     "iopub.status.busy": "2023-07-21T15:53:32.827408Z",
+     "iopub.status.idle": "2023-07-21T15:53:32.842949Z",
+     "shell.execute_reply": "2023-07-21T15:53:32.841736Z"
+    },
+    "papermill": {
+     "duration": 0.029024,
+     "end_time": "2023-07-21T15:53:32.845462",
+     "exception": false,
+     "start_time": "2023-07-21T15:53:32.816438",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "train_pred_and_time = pd.concat((train_df, times), axis=1)\n",
+    "test_predictors = test_df[predictor_columns]\n",
+    "f_c = test_predictors.EJ.unique()[0]\n",
+    "test_predictors.EJ = test_predictors.EJ.eq(f_c).astype('int')\n",
+    "test_pred_and_time = np.concatenate((test_predictors, np.zeros((len(test_predictors), 1)) + train_pred_and_time.Epsilon.max() + 1), axis=1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "id": "7c59e8fa",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-21T15:53:32.866481Z",
+     "iopub.status.busy": "2023-07-21T15:53:32.866028Z",
+     "iopub.status.idle": "2023-07-21T15:53:32.909710Z",
+     "shell.execute_reply": "2023-07-21T15:53:32.908189Z"
+    },
+    "papermill": {
+     "duration": 0.057234,
+     "end_time": "2023-07-21T15:53:32.912386",
+     "exception": false,
+     "start_time": "2023-07-21T15:53:32.855152",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Original dataset shape\n",
+      "A    509\n",
+      "B     61\n",
+      "G     29\n",
+      "D     18\n",
+      "Name: Alpha, dtype: int64\n",
+      "Resample dataset shape\n",
+      "B    509\n",
+      "A    509\n",
+      "D    509\n",
+      "G    509\n",
+      "Name: Alpha, dtype: int64\n"
+     ]
+    }
+   ],
+   "source": [
+    "ros = RandomOverSampler(random_state=42)\n",
+    "train_ros, y_ros = ros.fit_resample(train_pred_and_time, greeks_df.Alpha)\n",
+    "print('Original dataset shape')\n",
+    "print(greeks_df.Alpha.value_counts())\n",
+    "print('Resample dataset shape')\n",
+    "print( y_ros.value_counts())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "id": "24f93672",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-21T15:53:32.934654Z",
+     "iopub.status.busy": "2023-07-21T15:53:32.934203Z",
+     "iopub.status.idle": "2023-07-21T15:53:32.943106Z",
+     "shell.execute_reply": "2023-07-21T15:53:32.941927Z"
+    },
+    "papermill": {
+     "duration": 0.023743,
+     "end_time": "2023-07-21T15:53:32.945785",
+     "exception": false,
+     "start_time": "2023-07-21T15:53:32.922042",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "\n",
+    "x_ros = train_ros.drop(['Class', 'Id'],axis=1)\n",
+    "y_ = train_ros.Class"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "id": "3bb22cb9",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-21T15:53:32.969825Z",
+     "iopub.status.busy": "2023-07-21T15:53:32.969410Z",
+     "iopub.status.idle": "2023-07-21T15:53:34.012308Z",
+     "shell.execute_reply": "2023-07-21T15:53:34.011458Z"
+    },
+    "papermill": {
+     "duration": 1.058482,
+     "end_time": "2023-07-21T15:53:34.015114",
+     "exception": false,
+     "start_time": "2023-07-21T15:53:32.956632",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Loading model that can be used for inference only\n",
+      "Using a Transformer with 25.82 M parameters\n",
+      "Loading model that can be used for inference only\n",
+      "Using a Transformer with 25.82 M parameters\n"
+     ]
+    }
+   ],
+   "source": [
+    "\n",
+    "yt = Ensemble()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "id": "2f4be2ee",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-21T15:53:34.038390Z",
+     "iopub.status.busy": "2023-07-21T15:53:34.037910Z",
+     "iopub.status.idle": "2023-07-21T16:36:53.086642Z",
+     "shell.execute_reply": "2023-07-21T16:36:53.085438Z"
+    },
+    "papermill": {
+     "duration": 2599.074078,
+     "end_time": "2023-07-21T16:36:53.100516",
+     "exception": false,
+     "start_time": "2023-07-21T15:53:34.026438",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "1486711fdccc430bb8f19ffe0003cdf5",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "  0%|          | 0/5 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "best_model_saved\n",
+      ">val_loss=0.12283, split = 1.0\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/tmp/ipykernel_20/772101332.py:14: SettingWithCopyWarning: \n",
+      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+      "Try using .loc[row_indexer,col_indexer] = value instead\n",
+      "\n",
+      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+      "  X.EJ = X.EJ.eq(first_category).astype('int')\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "best_model_saved\n",
+      ">val_loss=0.00000, split = 2.0\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/tmp/ipykernel_20/772101332.py:14: SettingWithCopyWarning: \n",
+      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+      "Try using .loc[row_indexer,col_indexer] = value instead\n",
+      "\n",
+      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+      "  X.EJ = X.EJ.eq(first_category).astype('int')\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      ">val_loss=0.00000, split = 3.0\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/tmp/ipykernel_20/772101332.py:14: SettingWithCopyWarning: \n",
+      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+      "Try using .loc[row_indexer,col_indexer] = value instead\n",
+      "\n",
+      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+      "  X.EJ = X.EJ.eq(first_category).astype('int')\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      ">val_loss=0.13220, split = 4.0\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/tmp/ipykernel_20/772101332.py:14: SettingWithCopyWarning: \n",
+      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+      "Try using .loc[row_indexer,col_indexer] = value instead\n",
+      "\n",
+      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+      "  X.EJ = X.EJ.eq(first_category).astype('int')\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      ">val_loss=0.13386, split = 5.0\n",
+      "LOSS: 0.07778\n"
+     ]
+    }
+   ],
+   "source": [
+    "\n",
+    "m = training(yt,x_ros,y_,y_ros)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "id": "d96b306f",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-21T16:36:53.124238Z",
+     "iopub.status.busy": "2023-07-21T16:36:53.123776Z",
+     "iopub.status.idle": "2023-07-21T16:36:53.134302Z",
+     "shell.execute_reply": "2023-07-21T16:36:53.133149Z"
+    },
+    "papermill": {
+     "duration": 0.025325,
+     "end_time": "2023-07-21T16:36:53.136690",
+     "exception": false,
+     "start_time": "2023-07-21T16:36:53.111365",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "1    0.75\n",
+       "0    0.25\n",
+       "Name: Class, dtype: float64"
+      ]
+     },
+     "execution_count": 21,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "y_.value_counts()/y_.shape[0]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "id": "c45bd72b",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-21T16:36:53.161504Z",
+     "iopub.status.busy": "2023-07-21T16:36:53.160656Z",
+     "iopub.status.idle": "2023-07-21T16:44:01.043865Z",
+     "shell.execute_reply": "2023-07-21T16:44:01.042239Z"
+    },
+    "papermill": {
+     "duration": 427.900911,
+     "end_time": "2023-07-21T16:44:01.048717",
+     "exception": false,
+     "start_time": "2023-07-21T16:36:53.147806",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/opt/conda/lib/python3.10/site-packages/sklearn/base.py:439: UserWarning: X does not have valid feature names, but SimpleImputer was fitted with feature names\n",
+      "  warnings.warn(\n"
+     ]
+    }
+   ],
+   "source": [
+    "\n",
+    "y_pred = m.predict_proba(test_pred_and_time)\n",
+    "probabilities = np.concatenate((y_pred[:,:1], np.sum(y_pred[:,1:], 1, keepdims=True)), axis=1)\n",
+    "p0 = probabilities[:,:1]\n",
+    "p0[p0 > 0.59] = 1\n",
+    "p0[p0 < 0.28] = 0"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "id": "967ed14e",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-21T16:44:01.076361Z",
+     "iopub.status.busy": "2023-07-21T16:44:01.075908Z",
+     "iopub.status.idle": "2023-07-21T16:44:01.090619Z",
+     "shell.execute_reply": "2023-07-21T16:44:01.089411Z"
+    },
+    "papermill": {
+     "duration": 0.030936,
+     "end_time": "2023-07-21T16:44:01.093502",
+     "exception": false,
+     "start_time": "2023-07-21T16:44:01.062566",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "\n",
+    "submission = pd.DataFrame(test_df[\"Id\"], columns=[\"Id\"])\n",
+    "submission[\"class_0\"] = p0\n",
+    "submission[\"class_1\"] = 1 - p0\n",
+    "submission.to_csv('submission.csv', index=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "id": "f9b4a7c5",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-21T16:44:01.118941Z",
+     "iopub.status.busy": "2023-07-21T16:44:01.118283Z",
+     "iopub.status.idle": "2023-07-21T16:44:01.136373Z",
+     "shell.execute_reply": "2023-07-21T16:44:01.135160Z"
+    },
+    "papermill": {
+     "duration": 0.03388,
+     "end_time": "2023-07-21T16:44:01.139087",
+     "exception": false,
+     "start_time": "2023-07-21T16:44:01.105207",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Id</th>\n",
+       "      <th>class_0</th>\n",
+       "      <th>class_1</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>00eed32682bb</td>\n",
+       "      <td>0.5</td>\n",
+       "      <td>0.5</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>010ebe33f668</td>\n",
+       "      <td>0.5</td>\n",
+       "      <td>0.5</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>02fa521e1838</td>\n",
+       "      <td>0.5</td>\n",
+       "      <td>0.5</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>040e15f562a2</td>\n",
+       "      <td>0.5</td>\n",
+       "      <td>0.5</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>046e85c7cc7f</td>\n",
+       "      <td>0.5</td>\n",
+       "      <td>0.5</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "             Id  class_0  class_1\n",
+       "0  00eed32682bb      0.5      0.5\n",
+       "1  010ebe33f668      0.5      0.5\n",
+       "2  02fa521e1838      0.5      0.5\n",
+       "3  040e15f562a2      0.5      0.5\n",
+       "4  046e85c7cc7f      0.5      0.5"
+      ]
+     },
+     "execution_count": 24,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "submission_df = pd.read_csv('submission.csv')\n",
+    "submission_df"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.12"
+  },
+  "papermill": {
+   "default_parameters": {},
+   "duration": 3089.637665,
+   "end_time": "2023-07-21T16:44:03.814098",
+   "environment_variables": {},
+   "exception": null,
+   "input_path": "__notebook__.ipynb",
+   "output_path": "__notebook__.ipynb",
+   "parameters": {},
+   "start_time": "2023-07-21T15:52:34.176433",
+   "version": "2.4.0"
+  },
+  "widgets": {
+   "application/vnd.jupyter.widget-state+json": {
+    "state": {
+     "0eb183199e764348bf581e2ffce05ecf": {
+      "model_module": "@jupyter-widgets/base",
+      "model_module_version": "1.2.0",
+      "model_name": "LayoutModel",
+      "state": {
+       "_model_module": "@jupyter-widgets/base",
+       "_model_module_version": "1.2.0",
+       "_model_name": "LayoutModel",
+       "_view_count": null,
+       "_view_module": "@jupyter-widgets/base",
+       "_view_module_version": "1.2.0",
+       "_view_name": "LayoutView",
+       "align_content": null,
+       "align_items": null,
+       "align_self": null,
+       "border": null,
+       "bottom": null,
+       "display": null,
+       "flex": null,
+       "flex_flow": null,
+       "grid_area": null,
+       "grid_auto_columns": null,
+       "grid_auto_flow": null,
+       "grid_auto_rows": null,
+       "grid_column": null,
+       "grid_gap": null,
+       "grid_row": null,
+       "grid_template_areas": null,
+       "grid_template_columns": null,
+       "grid_template_rows": null,
+       "height": null,
+       "justify_content": null,
+       "justify_items": null,
+       "left": null,
+       "margin": null,
+       "max_height": null,
+       "max_width": null,
+       "min_height": null,
+       "min_width": null,
+       "object_fit": null,
+       "object_position": null,
+       "order": null,
+       "overflow": null,
+       "overflow_x": null,
+       "overflow_y": null,
+       "padding": null,
+       "right": null,
+       "top": null,
+       "visibility": null,
+       "width": null
+      }
+     },
+     "1486711fdccc430bb8f19ffe0003cdf5": {
+      "model_module": "@jupyter-widgets/controls",
+      "model_module_version": "1.5.0",
+      "model_name": "HBoxModel",
+      "state": {
+       "_dom_classes": [],
+       "_model_module": "@jupyter-widgets/controls",
+       "_model_module_version": "1.5.0",
+       "_model_name": "HBoxModel",
+       "_view_count": null,
+       "_view_module": "@jupyter-widgets/controls",
+       "_view_module_version": "1.5.0",
+       "_view_name": "HBoxView",
+       "box_style": "",
+       "children": [
+        "IPY_MODEL_f0a7e9ffa8394babae8a609317de1970",
+        "IPY_MODEL_d75596a40df548e8a3c16233b0c56d18",
+        "IPY_MODEL_ee353ab052bc474ead46fd0f9ed9e203"
+       ],
+       "layout": "IPY_MODEL_b2f3a43388b041edba2a3f9834a28a94"
+      }
+     },
+     "27e595e199224d0d93832f5dd41379e8": {
+      "model_module": "@jupyter-widgets/controls",
+      "model_module_version": "1.5.0",
+      "model_name": "DescriptionStyleModel",
+      "state": {
+       "_model_module": "@jupyter-widgets/controls",
+       "_model_module_version": "1.5.0",
+       "_model_name": "DescriptionStyleModel",
+       "_view_count": null,
+       "_view_module": "@jupyter-widgets/base",
+       "_view_module_version": "1.2.0",
+       "_view_name": "StyleView",
+       "description_width": ""
+      }
+     },
+     "44e788a6798c486db6f6a9d4d910eb9e": {
+      "model_module": "@jupyter-widgets/base",
+      "model_module_version": "1.2.0",
+      "model_name": "LayoutModel",
+      "state": {
+       "_model_module": "@jupyter-widgets/base",
+       "_model_module_version": "1.2.0",
+       "_model_name": "LayoutModel",
+       "_view_count": null,
+       "_view_module": "@jupyter-widgets/base",
+       "_view_module_version": "1.2.0",
+       "_view_name": "LayoutView",
+       "align_content": null,
+       "align_items": null,
+       "align_self": null,
+       "border": null,
+       "bottom": null,
+       "display": null,
+       "flex": null,
+       "flex_flow": null,
+       "grid_area": null,
+       "grid_auto_columns": null,
+       "grid_auto_flow": null,
+       "grid_auto_rows": null,
+       "grid_column": null,
+       "grid_gap": null,
+       "grid_row": null,
+       "grid_template_areas": null,
+       "grid_template_columns": null,
+       "grid_template_rows": null,
+       "height": null,
+       "justify_content": null,
+       "justify_items": null,
+       "left": null,
+       "margin": null,
+       "max_height": null,
+       "max_width": null,
+       "min_height": null,
+       "min_width": null,
+       "object_fit": null,
+       "object_position": null,
+       "order": null,
+       "overflow": null,
+       "overflow_x": null,
+       "overflow_y": null,
+       "padding": null,
+       "right": null,
+       "top": null,
+       "visibility": null,
+       "width": null
+      }
+     },
+     "ae831dbd067b467ab82639cf32c7c94d": {
+      "model_module": "@jupyter-widgets/controls",
+      "model_module_version": "1.5.0",
+      "model_name": "ProgressStyleModel",
+      "state": {
+       "_model_module": "@jupyter-widgets/controls",
+       "_model_module_version": "1.5.0",
+       "_model_name": "ProgressStyleModel",
+       "_view_count": null,
+       "_view_module": "@jupyter-widgets/base",
+       "_view_module_version": "1.2.0",
+       "_view_name": "StyleView",
+       "bar_color": null,
+       "description_width": ""
+      }
+     },
+     "b2f3a43388b041edba2a3f9834a28a94": {
+      "model_module": "@jupyter-widgets/base",
+      "model_module_version": "1.2.0",
+      "model_name": "LayoutModel",
+      "state": {
+       "_model_module": "@jupyter-widgets/base",
+       "_model_module_version": "1.2.0",
+       "_model_name": "LayoutModel",
+       "_view_count": null,
+       "_view_module": "@jupyter-widgets/base",
+       "_view_module_version": "1.2.0",
+       "_view_name": "LayoutView",
+       "align_content": null,
+       "align_items": null,
+       "align_self": null,
+       "border": null,
+       "bottom": null,
+       "display": null,
+       "flex": null,
+       "flex_flow": null,
+       "grid_area": null,
+       "grid_auto_columns": null,
+       "grid_auto_flow": null,
+       "grid_auto_rows": null,
+       "grid_column": null,
+       "grid_gap": null,
+       "grid_row": null,
+       "grid_template_areas": null,
+       "grid_template_columns": null,
+       "grid_template_rows": null,
+       "height": null,
+       "justify_content": null,
+       "justify_items": null,
+       "left": null,
+       "margin": null,
+       "max_height": null,
+       "max_width": null,
+       "min_height": null,
+       "min_width": null,
+       "object_fit": null,
+       "object_position": null,
+       "order": null,
+       "overflow": null,
+       "overflow_x": null,
+       "overflow_y": null,
+       "padding": null,
+       "right": null,
+       "top": null,
+       "visibility": null,
+       "width": null
+      }
+     },
+     "bfd66578853d48eeb16725dccf2b9065": {
+      "model_module": "@jupyter-widgets/controls",
+      "model_module_version": "1.5.0",
+      "model_name": "DescriptionStyleModel",
+      "state": {
+       "_model_module": "@jupyter-widgets/controls",
+       "_model_module_version": "1.5.0",
+       "_model_name": "DescriptionStyleModel",
+       "_view_count": null,
+       "_view_module": "@jupyter-widgets/base",
+       "_view_module_version": "1.2.0",
+       "_view_name": "StyleView",
+       "description_width": ""
+      }
+     },
+     "d75596a40df548e8a3c16233b0c56d18": {
+      "model_module": "@jupyter-widgets/controls",
+      "model_module_version": "1.5.0",
+      "model_name": "FloatProgressModel",
+      "state": {
+       "_dom_classes": [],
+       "_model_module": "@jupyter-widgets/controls",
+       "_model_module_version": "1.5.0",
+       "_model_name": "FloatProgressModel",
+       "_view_count": null,
+       "_view_module": "@jupyter-widgets/controls",
+       "_view_module_version": "1.5.0",
+       "_view_name": "ProgressView",
+       "bar_style": "success",
+       "description": "",
+       "description_tooltip": null,
+       "layout": "IPY_MODEL_0eb183199e764348bf581e2ffce05ecf",
+       "max": 5.0,
+       "min": 0.0,
+       "orientation": "horizontal",
+       "style": "IPY_MODEL_ae831dbd067b467ab82639cf32c7c94d",
+       "value": 5.0
+      }
+     },
+     "ee353ab052bc474ead46fd0f9ed9e203": {
+      "model_module": "@jupyter-widgets/controls",
+      "model_module_version": "1.5.0",
+      "model_name": "HTMLModel",
+      "state": {
+       "_dom_classes": [],
+       "_model_module": "@jupyter-widgets/controls",
+       "_model_module_version": "1.5.0",
+       "_model_name": "HTMLModel",
+       "_view_count": null,
+       "_view_module": "@jupyter-widgets/controls",
+       "_view_module_version": "1.5.0",
+       "_view_name": "HTMLView",
+       "description": "",
+       "description_tooltip": null,
+       "layout": "IPY_MODEL_ee7ef555c90e4457b64ff561bf94a63c",
+       "placeholder": "",
+       "style": "IPY_MODEL_bfd66578853d48eeb16725dccf2b9065",
+       "value": " 5/5 [43:19&lt;00:00, 520.03s/it]"
+      }
+     },
+     "ee7ef555c90e4457b64ff561bf94a63c": {
+      "model_module": "@jupyter-widgets/base",
+      "model_module_version": "1.2.0",
+      "model_name": "LayoutModel",
+      "state": {
+       "_model_module": "@jupyter-widgets/base",
+       "_model_module_version": "1.2.0",
+       "_model_name": "LayoutModel",
+       "_view_count": null,
+       "_view_module": "@jupyter-widgets/base",
+       "_view_module_version": "1.2.0",
+       "_view_name": "LayoutView",
+       "align_content": null,
+       "align_items": null,
+       "align_self": null,
+       "border": null,
+       "bottom": null,
+       "display": null,
+       "flex": null,
+       "flex_flow": null,
+       "grid_area": null,
+       "grid_auto_columns": null,
+       "grid_auto_flow": null,
+       "grid_auto_rows": null,
+       "grid_column": null,
+       "grid_gap": null,
+       "grid_row": null,
+       "grid_template_areas": null,
+       "grid_template_columns": null,
+       "grid_template_rows": null,
+       "height": null,
+       "justify_content": null,
+       "justify_items": null,
+       "left": null,
+       "margin": null,
+       "max_height": null,
+       "max_width": null,
+       "min_height": null,
+       "min_width": null,
+       "object_fit": null,
+       "object_position": null,
+       "order": null,
+       "overflow": null,
+       "overflow_x": null,
+       "overflow_y": null,
+       "padding": null,
+       "right": null,
+       "top": null,
+       "visibility": null,
+       "width": null
+      }
+     },
+     "f0a7e9ffa8394babae8a609317de1970": {
+      "model_module": "@jupyter-widgets/controls",
+      "model_module_version": "1.5.0",
+      "model_name": "HTMLModel",
+      "state": {
+       "_dom_classes": [],
+       "_model_module": "@jupyter-widgets/controls",
+       "_model_module_version": "1.5.0",
+       "_model_name": "HTMLModel",
+       "_view_count": null,
+       "_view_module": "@jupyter-widgets/controls",
+       "_view_module_version": "1.5.0",
+       "_view_name": "HTMLView",
+       "description": "",
+       "description_tooltip": null,
+       "layout": "IPY_MODEL_44e788a6798c486db6f6a9d4d910eb9e",
+       "placeholder": "",
+       "style": "IPY_MODEL_27e595e199224d0d93832f5dd41379e8",
+       "value": "100%"
+      }
+     }
+    },
+    "version_major": 2,
+    "version_minor": 0
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}