{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import torch\n",
    "import torch.nn as nn\n",
    "import torch.optim as optim\n",
    "from sklearn.model_selection import train_test_split\n",
    "import os\n",
    "\n",
    "DATA_DIR = os.path.join(\"..\", \"data\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/var/folders/v8/0hd98b512cn3ms2rz146k7jw0000gn/T/ipykernel_41770/685274063.py:1: DtypeWarning: Columns (481,482,483) have mixed types. Specify dtype option on import or set low_memory=False.\n",
      "  detailed_games_df = pd.read_csv(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 377608 entries, 0 to 377607\n",
      "Columns: 487 entries, Unnamed: 0 to ChalkSeed\n",
      "dtypes: float64(347), int64(133), object(7)\n",
      "memory usage: 1.4+ GB\n"
     ]
    }
   ],
   "source": [
    "detailed_games_df = pd.read_csv(\n",
    "    os.path.join(DATA_DIR, \"AllSuperDetailedGames.csv\"),\n",
    ")\n",
    "\n",
    "detailed_games_df.info()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Split Mens & Womens data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "mens_games_df = detailed_games_df[detailed_games_df[\"League\"] == \"M\"]\n",
    "wmns_games_df = detailed_games_df[detailed_games_df[\"League\"] == \"W\"]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Define Features, Targets, and register data on device"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "# define the features and target for our models\n",
    "feature_cols = [\n",
    "    \"ScoreDiff mean reg\",\n",
    "    \"FGMDiff mean reg\",\n",
    "    \"FGM3Diff mean reg\",\n",
    "    \"TODiff mean reg\",\n",
    "\n",
    "    \"OppScore mean reg\",\n",
    "    \"OppFGM mean reg\",\n",
    "    \"OppFGM3 mean reg\",\n",
    "    \"OppTO mean reg\",\n",
    "]\n",
    "\n",
    "target_cols = [\"Win\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "# split into training and testing datasets\n",
    "MX_train, MX_test, My_train, My_test = train_test_split(\n",
    "    mens_games_df[feature_cols],\n",
    "    mens_games_df[target_cols],\n",
    "    test_size=0.2,\n",
    "    random_state=1,\n",
    ")\n",
    "\n",
    "# same for womens\n",
    "WX_train, WX_test, Wy_train, Wy_test = train_test_split(\n",
    "    wmns_games_df[feature_cols],\n",
    "    wmns_games_df[target_cols],\n",
    "    test_size=0.2,\n",
    "    random_state=1,\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# convert data to tensor objects and register to device\n",
    "# DEVICE = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n",
    "\n",
    "def get_device() -> str:\n",
    "    if torch.cuda.is_available():\n",
    "        return \"cuda\"\n",
    "    if torch.backends.mps.is_available():\n",
    "        return \"mps\"\n",
    "    return \"cpu\"\n",
    "\n",
    "DEVICE = get_device()\n",
    "print(DEVICE)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "MX_train_T = torch.tensor(\n",
    "    MX_train.astype(float).values,\n",
    "    dtype=torch.float32,\n",
    ").to(DEVICE)\n",
    "\n",
    "MX_test_T = torch.tensor(\n",
    "    MX_test.astype(float).values,\n",
    "    dtype=torch.float32,\n",
    ").to(DEVICE)\n",
    "\n",
    "My_train_T = torch.tensor(\n",
    "    My_train.astype(float).values,\n",
    "    dtype=torch.float32,\n",
    ").to(DEVICE)\n",
    "\n",
    "My_test_T = torch.tensor(\n",
    "    My_test.astype(float).values,\n",
    "    dtype=torch.float32,\n",
    ").to(DEVICE)\n",
    "\n",
    "# # same for womens data\n",
    "WX_train_T = torch.tensor(\n",
    "    WX_train.values,\n",
    "    dtype=torch.float32,\n",
    ").to(DEVICE)\n",
    "\n",
    "WX_test_T = torch.tensor(\n",
    "    WX_test.values,\n",
    "    dtype=torch.float32,\n",
    ").to(DEVICE)\n",
    "\n",
    "Wy_train_T = torch.tensor(\n",
    "    Wy_train.values,\n",
    "    dtype=torch.float32,\n",
    ").to(DEVICE)\n",
    "\n",
    "Wy_test_T = torch.tensor(\n",
    "    Wy_test.values,\n",
    "    dtype=torch.float32,\n",
    ").to(DEVICE)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Generic Neural Network Framework\n",
    "\n",
    "I am using the same neural network structure for both the mens and womens data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "num_features = len(feature_cols)\n",
    "\n",
    "class NiglNN(nn.Module):\n",
    "    def __init__(self):\n",
    "        super().__init__()\n",
    "        self.activation_func = nn.Sigmoid()\n",
    "        self.layer1 = nn.Linear(num_features, 64) \n",
    "        self.layer2 = nn.Linear(64, 32)\n",
    "        self.layer3 = nn.Linear(32, 16)\n",
    "        self.layer4 = nn.Linear(16, 8)\n",
    "        self.layer5 = nn.Linear(8, 4)\n",
    "        self.layer6 = nn.Linear(4, 1)\n",
    "\n",
    "    def forward(self, x: torch.Tensor):\n",
    "        x = self.layer1(x)\n",
    "        x = self.activation_func(x)\n",
    "        x = self.layer2(x)\n",
    "        x = self.activation_func(x)\n",
    "        x = self.layer3(x)\n",
    "        x = self.activation_func(x)\n",
    "        x = self.layer4(x)\n",
    "        x = self.activation_func(x)\n",
    "        x = self.layer5(x)\n",
    "        x = self.activation_func(x)\n",
    "        x = self.layer6(x)\n",
    "        x = self.activation_func(x)\n",
    "        return x\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[1000 / 10000] Binary Cross Entropy: 0.6770758628845215\n",
      "[2000 / 10000] Binary Cross Entropy: 0.6671037077903748\n",
      "[3000 / 10000] Binary Cross Entropy: 0.6648934483528137\n",
      "[4000 / 10000] Binary Cross Entropy: 0.6640341281890869\n",
      "[5000 / 10000] Binary Cross Entropy: 0.663619875907898\n",
      "[6000 / 10000] Binary Cross Entropy: 0.6633755564689636\n",
      "[7000 / 10000] Binary Cross Entropy: 0.6631807088851929\n",
      "[8000 / 10000] Binary Cross Entropy: 0.663043200969696\n",
      "[9000 / 10000] Binary Cross Entropy: 0.6629269123077393\n",
      "[10000 / 10000] Binary Cross Entropy: 0.6629060506820679\n"
     ]
    }
   ],
   "source": [
    "# mens training loop\n",
    "torch.manual_seed(2)\n",
    "\n",
    "epochs = 10_000\n",
    "nigl10k = NiglNN().to(DEVICE)\n",
    "loss_fn = nn.BCEWithLogitsLoss()\n",
    "optimizer = optim.Adam(\n",
    "    lr=0.001,\n",
    "    params=nigl10k.parameters(),\n",
    ")\n",
    "\n",
    "for epoch in range(1, epochs + 1):\n",
    "    optimizer.zero_grad()\n",
    "    pred = nigl10k(MX_train_T)\n",
    "    loss = loss_fn(pred, My_train_T) \n",
    "    loss.backward()\n",
    "    optimizer.step()\n",
    "\n",
    "    if epoch % 1_000 == 0:\n",
    "        print(f\"[{epoch} / {epochs}] Binary Cross Entropy: {loss.item()}\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Binary Cross Entropy: 0.6655928492546082\n"
     ]
    }
   ],
   "source": [
    "nigl10k.eval()\n",
    "\n",
    "with torch.no_grad():\n",
    "    pred = nigl10k(MX_test_T)\n",
    "    loss = loss_fn(pred, My_test_T)\n",
    "    print(f\"Binary Cross Entropy: {loss.item()}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "# save model\n",
    "MODEL_DIR = os.path.join(\"..\", \"models\")\n",
    "\n",
    "torch.save(\n",
    "    nigl10k,\n",
    "    os.path.join(MODEL_DIR, \"nn10k.pth\"),\n",
    ")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}