{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": []
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    },
    "accelerator": "GPU",
    "gpuClass": "standard",
    "widgets": {
      "application/vnd.jupyter.widget-state+json": {
        "23633252c1024924905ec679b76afcff": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_c2388f6069984613b88dc84ddb8e4fde",
              "IPY_MODEL_49e6c1619fdc4e57baf4d981828fc141",
              "IPY_MODEL_67459de96a474b3c89d12c259823fe8f"
            ],
            "layout": "IPY_MODEL_096988fe730241bca5b4647c3f5ac561"
          }
        },
        "c2388f6069984613b88dc84ddb8e4fde": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_432ca53539984f6f8d38ff46c3afa42c",
            "placeholder": "​",
            "style": "IPY_MODEL_48d442f8e826410da171ab3c54bee0ee",
            "value": "Model export complete: 100%"
          }
        },
        "49e6c1619fdc4e57baf4d981828fc141": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_2571df81b38e490b8752309bd485b91e",
            "max": 6,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_02d2d92f6f754d6a9a6b9ed63d5dbed2",
            "value": 6
          }
        },
        "67459de96a474b3c89d12c259823fe8f": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_918c8791a4cb4fc08f16f49bbd2cd73f",
            "placeholder": "​",
            "style": "IPY_MODEL_3058453f9373468d9f09a5867c834d18",
            "value": " 6/6 [05:03&lt;00:00, 54.56s/it]"
          }
        },
        "096988fe730241bca5b4647c3f5ac561": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "432ca53539984f6f8d38ff46c3afa42c": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "48d442f8e826410da171ab3c54bee0ee": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "2571df81b38e490b8752309bd485b91e": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "02d2d92f6f754d6a9a6b9ed63d5dbed2": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "918c8791a4cb4fc08f16f49bbd2cd73f": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "3058453f9373468d9f09a5867c834d18": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        }
      }
    }
  },
  "cells": [
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "KNG3EMWB9woD"
      },
      "outputs": [],
      "source": [
        "!pip install click==8.0.3\n",
        "!pip install cloudml_hypertune==0.1.0.dev6\n",
        "!pip install hypertune==0.0.0\n",
        "!pip uninstall matplotlib\n",
        "!pip install matplotlib==3.1.3\n",
        "!pip install numpy==1.20.3\n",
        "!pip install pandas==1.3.4\n",
        "!pip install protobuf==3.19.3\n",
        "!pip install python-dotenv==0.19.2\n",
        "!pip install cikit_learn==1.0.2\n",
        "!pip install torch==1.10.1\n",
        "!pip install transformers==4.15.0\n",
        "!pip install hopsworks"
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "import warnings\n",
        "warnings.filterwarnings(\"ignore\")"
      ],
      "metadata": {
        "id": "9jQ-nMBYH1mB"
      },
      "execution_count": 2,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "import hopsworks\n",
        "project = hopsworks.login()"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "xfOcg7kX_G15",
        "outputId": "764a5c83-0b44-42fa-ec56-f5fea94c35ed"
      },
      "execution_count": 3,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Copy your Api Key (first register/login): https://c.app.hopsworks.ai/account/api/generated\n",
            "\n",
            "Paste it here: ··········\n",
            "Connected. Call `.close()` to terminate connection gracefully.\n",
            "\n",
            "Multiple projects found. \n",
            "\n",
            "\t (1) liangc40\n",
            "\t (2) Lab1_for_iris\n",
            "\n",
            "Enter project to access: 1\n",
            "\n",
            "Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/5311\n"
          ]
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "## Load Feature from Hopsworks"
      ],
      "metadata": {
        "id": "AS56zXEDCeae"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "fs = project.get_feature_store()\n",
        "try: \n",
        "  feature_view = fs.get_feature_view(name=\"sentimental_analysis_feature_group\", version=1)\n",
        "except:\n",
        "  fg = fs.get_feature_group(name=\"sentimental_analysis_feature_group\", version=1)\n",
        "  query = fg.select_all()\n",
        "  feature_view = fs.create_feature_view(name=\"sentimental_analysis_feature_group\",\n",
        "                                        version=1,\n",
        "                                        description=\"Read from pre-processed sentimental analysis dataset\",\n",
        "                                        labels=[\"label\"],\n",
        "                                        query=query)  "
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "ck9vNlZj_cRA",
        "outputId": "1dbcae12-51cf-4a38-d77e-dd94e0201299"
      },
      "execution_count": 4,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "DeprecationWarning: Using or importing the ABCs from 'collections' instead of from 'collections.abc' is deprecated since Python 3.3, and in 3.10 it will stop working\n",
            "DeprecationWarning: Using or importing the ABCs from 'collections' instead of from 'collections.abc' is deprecated since Python 3.3, and in 3.10 it will stop working\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Connected. Call `.close()` to terminate connection gracefully.\n"
          ]
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "## Create DataLoader and TweetsDataset"
      ],
      "metadata": {
        "id": "nts7RyyHCmlJ"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "BATCH_SIZE = 16\n",
        "MAX_LEN = 160\n",
        "EPOCHS = 3"
      ],
      "metadata": {
        "id": "zwfWbehIEZWH"
      },
      "execution_count": 35,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "from torch.utils.data import Dataset, DataLoader\n",
        "from sklearn.model_selection import train_test_split\n",
        "import torch\n",
        "import numpy as np\n",
        "from transformers import BertModel, BertTokenizer, AdamW, get_linear_schedule_with_warmup\n",
        "\n",
        "class TweetsDataset(Dataset):\n",
        "  def __init__(self, message, depression, tokenizer, max_len):\n",
        "    self.message = message\n",
        "    self.depression = depression\n",
        "    self.tokenizer = tokenizer\n",
        "    self.max_len = max_len\n",
        "  \n",
        "  def __len__(self):\n",
        "    return len(self.message)\n",
        "  \n",
        "  def __getitem__(self, item):\n",
        "    message = str(self.message[item])\n",
        "    depression = self.depression[item]\n",
        "\n",
        "    encoding = self.tokenizer.encode_plus(\n",
        "      message,\n",
        "      add_special_tokens=True,\n",
        "      max_length=self.max_len,\n",
        "      return_token_type_ids=False,\n",
        "      truncation=True,\n",
        "      pad_to_max_length=True,\n",
        "      return_attention_mask=True,\n",
        "      return_tensors='pt',\n",
        "    )\n",
        "\n",
        "    return {\n",
        "      'tweet_text': message,\n",
        "      'input_ids': encoding['input_ids'].flatten(),\n",
        "      'attention_mask': encoding['attention_mask'].flatten(),\n",
        "      'depression': torch.tensor(depression, dtype=torch.long)\n",
        "    }"
      ],
      "metadata": {
        "id": "Icpi3iw7CRBu"
      },
      "execution_count": 6,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "def create_data_loader(message, depression, tokenizer, max_len, batch_size):\n",
        "  ds = TweetsDataset(\n",
        "    message = message['message'].to_numpy(),\n",
        "    depression = depression['label'].to_numpy(),\n",
        "    tokenizer=tokenizer,\n",
        "    max_len=max_len\n",
        "  )\n",
        "\n",
        "  return DataLoader(\n",
        "    ds,\n",
        "    batch_size = batch_size,\n",
        "    num_workers = 9\n",
        "  )"
      ],
      "metadata": {
        "id": "UzKUaFdOCU98"
      },
      "execution_count": 22,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "train_message, test_message, train_depression, test_depression = feature_view.train_test_split(0.2)\n",
        "\n",
        "#Creating dataloaders\n",
        "tokenizer = BertTokenizer.from_pretrained('bert-base-cased')\n",
        "train_data_loader = create_data_loader(train_message, train_depression, tokenizer, MAX_LEN, BATCH_SIZE)\n",
        "test_data_loader = create_data_loader(test_message, test_depression, tokenizer, MAX_LEN, BATCH_SIZE)\n",
        "data = next(iter(train_data_loader))"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "QLzTqeQ7DDTs",
        "outputId": "4c4b73fd-1b23-40a2-ca23-39efcfb9db72"
      },
      "execution_count": 23,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "VersionWarning: Incremented version to `39`.\n"
          ]
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "## Bert-Based Depression Classier Model"
      ],
      "metadata": {
        "id": "dzDl3HR6MRqf"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "from torch import nn, optim\n",
        "import torch.nn.functional as F\n",
        "import transformers\n",
        "from transformers import BertModel, BertTokenizer, AdamW, get_linear_schedule_with_warmup\n",
        "from collections import defaultdict\n",
        "\n",
        "class DepressionClassifier(nn.Module):\n",
        "  def __init__(self, n_classes, pre_trained_model_name):\n",
        "    super(DepressionClassifier, self).__init__()\n",
        "    self.bert = BertModel.from_pretrained(pre_trained_model_name)\n",
        "    self.drop = nn.Dropout(p=0.3)\n",
        "    self.out = nn.Linear(self.bert.config.hidden_size, n_classes)\n",
        "\n",
        "  def forward(self, input_ids, attention_mask):\n",
        "    _, pooled_output = self.bert(\n",
        "      input_ids=input_ids,\n",
        "      attention_mask=attention_mask,\n",
        "      return_dict = False #here\n",
        "    )\n",
        "    output = self.drop(pooled_output)\n",
        "    return self.out(output)"
      ],
      "metadata": {
        "id": "frP5Mk_4NvSe"
      },
      "execution_count": 24,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "class_names = ['Not Depressed', 'Depressed']\n",
        "model = DepressionClassifier(len(class_names), 'bert-base-cased')"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "TH0OMDamN32-",
        "outputId": "3ec8d3f7-1dee-4c0f-f004-37bcc2112a16"
      },
      "execution_count": 25,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']\n",
            "- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
            "- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n"
          ]
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "## Training Functions"
      ],
      "metadata": {
        "id": "wpJdcYItKqnN"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "from torch import nn, optim\n",
        "import torch.nn.functional as F\n",
        "import transformers\n",
        "from transformers import BertModel, BertTokenizer, AdamW, get_linear_schedule_with_warmup\n",
        "from collections import defaultdict\n",
        "import matplotlib.pyplot as plt"
      ],
      "metadata": {
        "id": "czXmMyUzLS7z"
      },
      "execution_count": 26,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "def train_epoch(model, data_loader, loss_fn, optimizer, device, scheduler, n_examples):\n",
        "  model = model.train()\n",
        "\n",
        "  losses = []\n",
        "  correct_predictions = 0\n",
        "  \n",
        "  for d in data_loader:\n",
        "    input_ids = d[\"input_ids\"].to(device)\n",
        "    attention_mask = d[\"attention_mask\"].to(device)\n",
        "    depression = d[\"depression\"].to(device)\n",
        "\n",
        "    outputs = model(\n",
        "      input_ids = input_ids,\n",
        "      attention_mask = attention_mask\n",
        "    )\n",
        "\n",
        "    _, preds = torch.max(outputs, dim=1)\n",
        "    loss = loss_fn(outputs, depression)\n",
        "\n",
        "    correct_predictions += torch.sum(preds == depression)\n",
        "    losses.append(loss.item())\n",
        "\n",
        "    loss.backward()\n",
        "    nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)\n",
        "    optimizer.step()\n",
        "    scheduler.step()\n",
        "    optimizer.zero_grad()\n",
        "\n",
        "  return correct_predictions.double() / n_examples, np.mean(losses)"
      ],
      "metadata": {
        "id": "OZ9Ykhx9Kv9X"
      },
      "execution_count": 27,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "def eval_model(model, data_loader, loss_fn, device, n_examples):\n",
        "  model = model.eval()\n",
        "  losses = []\n",
        "  correct_predictions = 0\n",
        "\n",
        "  with torch.no_grad():\n",
        "    for d in data_loader:\n",
        "      input_ids = d[\"input_ids\"].to(device)\n",
        "      attention_mask = d[\"attention_mask\"].to(device)\n",
        "      depression = d[\"depression\"].to(device)\n",
        "\n",
        "      outputs = model(\n",
        "        input_ids = input_ids,\n",
        "        attention_mask = attention_mask\n",
        "      )\n",
        "      _, preds = torch.max(outputs, dim=1)\n",
        "\n",
        "      loss = loss_fn(outputs, depression)\n",
        "\n",
        "      correct_predictions += torch.sum(preds == depression)\n",
        "      losses.append(loss.item())\n",
        "\n",
        "  return correct_predictions.double() / n_examples, np.mean(losses)"
      ],
      "metadata": {
        "id": "T6DMQmcrL0t6"
      },
      "execution_count": 28,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "def loss_accuracy_plots(history):\n",
        "    plt.figure(1)\n",
        "    plt.plot(history['train_loss'])\n",
        "    plt.plot(history['val_loss'])\n",
        "    plt.xlabel(\"Epochs [-]\")\n",
        "    plt.ylabel(\"Loss [-]\")\n",
        "    plt.legend(['Training loss','Validation loss'])\n",
        "    plt.grid()\n",
        "    plt.savefig(f\"/content/Training_losses_plot.jpg\")\n",
        "    plt.figure(2)\n",
        "    plt.plot(history['train_acc'])\n",
        "    plt.plot(history['val_acc'])\n",
        "    plt.xlabel(\"Epochs [-]\")\n",
        "    plt.ylabel(\"Loss [-]\")\n",
        "    plt.legend(['Training accuracy','Validation accuracy'])\n",
        "    plt.grid()\n",
        "    plt.savefig(f\"/content/Training_accuracies_plot.jpg\")"
      ],
      "metadata": {
        "id": "JkAu-va5L34i"
      },
      "execution_count": 51,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "## Training Data"
      ],
      "metadata": {
        "id": "rfslV1NJL7cj"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "gpu_info = !nvidia-smi\n",
        "gpu_info = '\\n'.join(gpu_info)\n",
        "if gpu_info.find('failed') >= 0:\n",
        "  print('Not connected to a GPU')\n",
        "else:\n",
        "  print(gpu_info)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "d_vJG_kuQlTw",
        "outputId": "aff034a1-da7f-4159-f68b-82f6ba10812f"
      },
      "execution_count": 31,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Wed Jan 11 10:55:48 2023       \n",
            "+-----------------------------------------------------------------------------+\n",
            "| NVIDIA-SMI 460.32.03    Driver Version: 460.32.03    CUDA Version: 11.2     |\n",
            "|-------------------------------+----------------------+----------------------+\n",
            "| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |\n",
            "| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |\n",
            "|                               |                      |               MIG M. |\n",
            "|===============================+======================+======================|\n",
            "|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |\n",
            "| N/A   55C    P0    29W /  70W |  10716MiB / 15109MiB |      0%      Default |\n",
            "|                               |                      |                  N/A |\n",
            "+-------------------------------+----------------------+----------------------+\n",
            "                                                                               \n",
            "+-----------------------------------------------------------------------------+\n",
            "| Processes:                                                                  |\n",
            "|  GPU   GI   CI        PID   Type   Process name                  GPU Memory |\n",
            "|        ID   ID                                                   Usage      |\n",
            "|=============================================================================|\n",
            "+-----------------------------------------------------------------------------+\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n",
        "model = model.to(device)\n",
        "input_ids = data['input_ids'].to(device)\n",
        "attention_mask = data['attention_mask'].to(device)"
      ],
      "metadata": {
        "id": "ly__rDVkRwB2"
      },
      "execution_count": 32,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "F.softmax(model(input_ids, attention_mask), dim=1)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "uLoWAKm3Wz8K",
        "outputId": "d5713105-5c3d-40b4-82e4-5c6766852e5e"
      },
      "execution_count": 33,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "tensor([[0.6483, 0.3517],\n",
              "        [0.7467, 0.2533],\n",
              "        [0.7182, 0.2818],\n",
              "        [0.6410, 0.3590],\n",
              "        [0.4981, 0.5019],\n",
              "        [0.6323, 0.3677],\n",
              "        [0.3284, 0.6716],\n",
              "        [0.6354, 0.3646],\n",
              "        [0.5387, 0.4613],\n",
              "        [0.5530, 0.4470],\n",
              "        [0.5840, 0.4160],\n",
              "        [0.6082, 0.3918],\n",
              "        [0.5927, 0.4073],\n",
              "        [0.5545, 0.4455],\n",
              "        [0.7305, 0.2695],\n",
              "        [0.6892, 0.3108]], device='cuda:0', grad_fn=<SoftmaxBackward0>)"
            ]
          },
          "metadata": {},
          "execution_count": 33
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "import gc\n",
        "gc.collect()\n",
        "\n",
        "optimizer = AdamW(model.parameters(), lr = 2e-5, correct_bias = False)\n",
        "total_steps = len(train_data_loader) * EPOCHS\n",
        "scheduler = get_linear_schedule_with_warmup(optimizer,\n",
        "                                                num_warmup_steps = 0,\n",
        "                                                num_training_steps = total_steps)\n",
        "\n",
        "loss_fn = nn.CrossEntropyLoss().to(device)\n",
        "history = defaultdict(list)\n",
        "best_accuracy = 0\n",
        "\n",
        "for epoch in range(EPOCHS):\n",
        "  print(f'Epoch {epoch + 1}/{EPOCHS}')\n",
        "  print('-' * 10)\n",
        "  \n",
        "  train_acc, train_loss = train_epoch(model, train_data_loader, loss_fn, optimizer, device, scheduler, len(train_message))\n",
        "  \n",
        "  print(f'Train loss {train_loss} accuracy {train_acc}')\n",
        "  \n",
        "  val_acc, val_loss = eval_model(model, test_data_loader, loss_fn, device, len(test_message))\n",
        "  \n",
        "  print(f'Val   loss {val_loss} accuracy {val_acc}')\n",
        "  \n",
        "  history['train_acc'].append(train_acc)\n",
        "  history['train_loss'].append(train_loss)\n",
        "  history['val_acc'].append(val_acc)\n",
        "  history['val_loss'].append(val_loss)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "RKbvtLNnW7dh",
        "outputId": "1cc22ebc-bf68-4d97-f976-f37d92bc7993"
      },
      "execution_count": 41,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Epoch 1/3\n",
            "----------\n",
            "Train loss 0.032615248548951696 accuracy 0.9951367781155015\n",
            "Val   loss 0.03613543838475535 accuracy 0.9941662615459407\n",
            "Epoch 2/3\n",
            "----------\n",
            "Train loss 0.021585255281155413 accuracy 0.9958662613981764\n",
            "Val   loss 0.008615166831007156 accuracy 0.9990277102576568\n",
            "Epoch 3/3\n",
            "----------\n",
            "Train loss 0.003893426973731551 accuracy 0.9993920972644377\n",
            "Val   loss 0.009192386632538158 accuracy 0.9985415653864851\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "from google.colab import drive\n",
        "drive.mount('/content/drive')\n",
        "torch.save(model.state_dict(), '/content/drive/MyDrive/data/weights.pth')"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "asNHjpLTZOJQ",
        "outputId": "bc24d7ab-e05e-451c-dbe4-52328ccf71ac"
      },
      "execution_count": 55,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Mounted at /content/drive\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "import os\n",
        "import joblib\n",
        "from hsml.schema import Schema\n",
        "from hsml.model_schema import ModelSchema\n",
        "from sklearn.metrics import classification_report\n",
        "\n",
        "# We will now upload our model to the Hopsworks Model Registry. First get an object for the model registry.\n",
        "mr = project.get_model_registry()\n",
        "    \n",
        "# The contents of the directory will be saved to the model registry. Create the dir, first.\n",
        "model_dir=\"sentimental_analysis_model\"\n",
        "if os.path.isdir(model_dir) == False:\n",
        "  os.mkdir(model_dir)\n",
        "\n",
        "# Save both our model and the confusion matrix to 'model_dir', whose contents will be uploaded to the model registry\n",
        "joblib.dump(model, model_dir + \"/sentimental_analysis_model.pkl\")  \n",
        "\n",
        "\n",
        "# Specify the schema of the model's input/output using the features (X_train) and labels (y_train)\n",
        "input_schema = Schema(train_message)\n",
        "output_schema = Schema(train_depression)\n",
        "model_schema = ModelSchema(input_schema, output_schema)\n",
        "\n",
        "# Create an entry in the model registry that includes the model's name, desc, metrics\n",
        "sentimental_analysis_model = mr.python.create_model(\n",
        "    name=\"sentimental_analysis_model\", \n",
        "    model_schema=model_schema,\n",
        "    description=\"Sentimental Analysis Predictor\"\n",
        ")\n",
        "    \n",
        "# Upload the model to the model registry, including all files in 'model_dir'\n",
        "sentimental_analysis_model.save(model_dir)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 103,
          "referenced_widgets": [
            "23633252c1024924905ec679b76afcff",
            "c2388f6069984613b88dc84ddb8e4fde",
            "49e6c1619fdc4e57baf4d981828fc141",
            "67459de96a474b3c89d12c259823fe8f",
            "096988fe730241bca5b4647c3f5ac561",
            "432ca53539984f6f8d38ff46c3afa42c",
            "48d442f8e826410da171ab3c54bee0ee",
            "2571df81b38e490b8752309bd485b91e",
            "02d2d92f6f754d6a9a6b9ed63d5dbed2",
            "918c8791a4cb4fc08f16f49bbd2cd73f",
            "3058453f9373468d9f09a5867c834d18"
          ]
        },
        "id": "PNbxNGUimwj8",
        "outputId": "2e775988-7d2e-46d7-dba7-30896b30f7ac"
      },
      "execution_count": 56,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Connected. Call `.close()` to terminate connection gracefully.\n"
          ]
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "  0%|          | 0/6 [00:00<?, ?it/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "23633252c1024924905ec679b76afcff"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Model created, explore it at https://c.app.hopsworks.ai:443/p/5311/models/sentimental_analysis_model/1\n"
          ]
        },
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "Model(name: 'sentimental_analysis_model', version: 1)"
            ]
          },
          "metadata": {},
          "execution_count": 56
        }
      ]
    }
  ]
}