Spaces:

qisan
/

Depressed_sentimental_analysis

Runtime error

App Files Files Community

liangc40 commited on Jan 11, 2023

Commit

06cad47

1 Parent(s): 3d4e13e

Upload sentimental_analysis_training_pipeline.ipynb

Browse files

Files changed (1) hide show

sentimental_analysis_training_pipeline.ipynb +1094 -0

sentimental_analysis_training_pipeline.ipynb ADDED Viewed

	@@ -0,0 +1,1094 @@

+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "provenance": []
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    },
+    "language_info": {
+      "name": "python"
+    },
+    "accelerator": "GPU",
+    "gpuClass": "standard",
+    "widgets": {
+      "application/vnd.jupyter.widget-state+json": {
+        "23633252c1024924905ec679b76afcff": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HBoxModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HBoxModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HBoxView",
+            "box_style": "",
+            "children": [
+              "IPY_MODEL_c2388f6069984613b88dc84ddb8e4fde",
+              "IPY_MODEL_49e6c1619fdc4e57baf4d981828fc141",
+              "IPY_MODEL_67459de96a474b3c89d12c259823fe8f"
+            ],
+            "layout": "IPY_MODEL_096988fe730241bca5b4647c3f5ac561"
+          }
+        },
+        "c2388f6069984613b88dc84ddb8e4fde": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HTMLModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_432ca53539984f6f8d38ff46c3afa42c",
+            "placeholder": "",
+            "style": "IPY_MODEL_48d442f8e826410da171ab3c54bee0ee",
+            "value": "Model export complete: 100%"
+          }
+        },
+        "49e6c1619fdc4e57baf4d981828fc141": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "FloatProgressModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "FloatProgressModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "ProgressView",
+            "bar_style": "success",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_2571df81b38e490b8752309bd485b91e",
+            "max": 6,
+            "min": 0,
+            "orientation": "horizontal",
+            "style": "IPY_MODEL_02d2d92f6f754d6a9a6b9ed63d5dbed2",
+            "value": 6
+          }
+        },
+        "67459de96a474b3c89d12c259823fe8f": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "HTMLModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_918c8791a4cb4fc08f16f49bbd2cd73f",
+            "placeholder": "",
+            "style": "IPY_MODEL_3058453f9373468d9f09a5867c834d18",
+            "value": " 6/6 [05:03&lt;00:00, 54.56s/it]"
+          }
+        },
+        "096988fe730241bca5b4647c3f5ac561": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "432ca53539984f6f8d38ff46c3afa42c": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "48d442f8e826410da171ab3c54bee0ee": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "DescriptionStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "2571df81b38e490b8752309bd485b91e": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "02d2d92f6f754d6a9a6b9ed63d5dbed2": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "ProgressStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "ProgressStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "bar_color": null,
+            "description_width": ""
+          }
+        },
+        "918c8791a4cb4fc08f16f49bbd2cd73f": {
+          "model_module": "@jupyter-widgets/base",
+          "model_name": "LayoutModel",
+          "model_module_version": "1.2.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "3058453f9373468d9f09a5867c834d18": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_name": "DescriptionStyleModel",
+          "model_module_version": "1.5.0",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        }
+      }
+    }
+  },
+  "cells": [
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "KNG3EMWB9woD"
+      },
+      "outputs": [],
+      "source": [
+        "!pip install click==8.0.3\n",
+        "!pip install cloudml_hypertune==0.1.0.dev6\n",
+        "!pip install hypertune==0.0.0\n",
+        "!pip uninstall matplotlib\n",
+        "!pip install matplotlib==3.1.3\n",
+        "!pip install numpy==1.20.3\n",
+        "!pip install pandas==1.3.4\n",
+        "!pip install protobuf==3.19.3\n",
+        "!pip install python-dotenv==0.19.2\n",
+        "!pip install cikit_learn==1.0.2\n",
+        "!pip install torch==1.10.1\n",
+        "!pip install transformers==4.15.0\n",
+        "!pip install hopsworks"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "import warnings\n",
+        "warnings.filterwarnings(\"ignore\")"
+      ],
+      "metadata": {
+        "id": "9jQ-nMBYH1mB"
+      },
+      "execution_count": 2,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "import hopsworks\n",
+        "project = hopsworks.login()"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "xfOcg7kX_G15",
+        "outputId": "764a5c83-0b44-42fa-ec56-f5fea94c35ed"
+      },
+      "execution_count": 3,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Copy your Api Key (first register/login): https://c.app.hopsworks.ai/account/api/generated\n",
+            "\n",
+            "Paste it here: ··········\n",
+            "Connected. Call `.close()` to terminate connection gracefully.\n",
+            "\n",
+            "Multiple projects found. \n",
+            "\n",
+            "\t (1) liangc40\n",
+            "\t (2) Lab1_for_iris\n",
+            "\n",
+            "Enter project to access: 1\n",
+            "\n",
+            "Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/5311\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "## Load Feature from Hopsworks"
+      ],
+      "metadata": {
+        "id": "AS56zXEDCeae"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "fs = project.get_feature_store()\n",
+        "try: \n",
+        "  feature_view = fs.get_feature_view(name=\"sentimental_analysis_feature_group\", version=1)\n",
+        "except:\n",
+        "  fg = fs.get_feature_group(name=\"sentimental_analysis_feature_group\", version=1)\n",
+        "  query = fg.select_all()\n",
+        "  feature_view = fs.create_feature_view(name=\"sentimental_analysis_feature_group\",\n",
+        "                                        version=1,\n",
+        "                                        description=\"Read from pre-processed sentimental analysis dataset\",\n",
+        "                                        labels=[\"label\"],\n",
+        "                                        query=query)  "
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "ck9vNlZj_cRA",
+        "outputId": "1dbcae12-51cf-4a38-d77e-dd94e0201299"
+      },
+      "execution_count": 4,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "DeprecationWarning: Using or importing the ABCs from 'collections' instead of from 'collections.abc' is deprecated since Python 3.3, and in 3.10 it will stop working\n",
+            "DeprecationWarning: Using or importing the ABCs from 'collections' instead of from 'collections.abc' is deprecated since Python 3.3, and in 3.10 it will stop working\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Connected. Call `.close()` to terminate connection gracefully.\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "## Create DataLoader and TweetsDataset"
+      ],
+      "metadata": {
+        "id": "nts7RyyHCmlJ"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "BATCH_SIZE = 16\n",
+        "MAX_LEN = 160\n",
+        "EPOCHS = 3"
+      ],
+      "metadata": {
+        "id": "zwfWbehIEZWH"
+      },
+      "execution_count": 35,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "from torch.utils.data import Dataset, DataLoader\n",
+        "from sklearn.model_selection import train_test_split\n",
+        "import torch\n",
+        "import numpy as np\n",
+        "from transformers import BertModel, BertTokenizer, AdamW, get_linear_schedule_with_warmup\n",
+        "\n",
+        "class TweetsDataset(Dataset):\n",
+        "  def __init__(self, message, depression, tokenizer, max_len):\n",
+        "    self.message = message\n",
+        "    self.depression = depression\n",
+        "    self.tokenizer = tokenizer\n",
+        "    self.max_len = max_len\n",
+        "  \n",
+        "  def __len__(self):\n",
+        "    return len(self.message)\n",
+        "  \n",
+        "  def __getitem__(self, item):\n",
+        "    message = str(self.message[item])\n",
+        "    depression = self.depression[item]\n",
+        "\n",
+        "    encoding = self.tokenizer.encode_plus(\n",
+        "      message,\n",
+        "      add_special_tokens=True,\n",
+        "      max_length=self.max_len,\n",
+        "      return_token_type_ids=False,\n",
+        "      truncation=True,\n",
+        "      pad_to_max_length=True,\n",
+        "      return_attention_mask=True,\n",
+        "      return_tensors='pt',\n",
+        "    )\n",
+        "\n",
+        "    return {\n",
+        "      'tweet_text': message,\n",
+        "      'input_ids': encoding['input_ids'].flatten(),\n",
+        "      'attention_mask': encoding['attention_mask'].flatten(),\n",
+        "      'depression': torch.tensor(depression, dtype=torch.long)\n",
+        "    }"
+      ],
+      "metadata": {
+        "id": "Icpi3iw7CRBu"
+      },
+      "execution_count": 6,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "def create_data_loader(message, depression, tokenizer, max_len, batch_size):\n",
+        "  ds = TweetsDataset(\n",
+        "    message = message['message'].to_numpy(),\n",
+        "    depression = depression['label'].to_numpy(),\n",
+        "    tokenizer=tokenizer,\n",
+        "    max_len=max_len\n",
+        "  )\n",
+        "\n",
+        "  return DataLoader(\n",
+        "    ds,\n",
+        "    batch_size = batch_size,\n",
+        "    num_workers = 9\n",
+        "  )"
+      ],
+      "metadata": {
+        "id": "UzKUaFdOCU98"
+      },
+      "execution_count": 22,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "train_message, test_message, train_depression, test_depression = feature_view.train_test_split(0.2)\n",
+        "\n",
+        "#Creating dataloaders\n",
+        "tokenizer = BertTokenizer.from_pretrained('bert-base-cased')\n",
+        "train_data_loader = create_data_loader(train_message, train_depression, tokenizer, MAX_LEN, BATCH_SIZE)\n",
+        "test_data_loader = create_data_loader(test_message, test_depression, tokenizer, MAX_LEN, BATCH_SIZE)\n",
+        "data = next(iter(train_data_loader))"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "QLzTqeQ7DDTs",
+        "outputId": "4c4b73fd-1b23-40a2-ca23-39efcfb9db72"
+      },
+      "execution_count": 23,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "VersionWarning: Incremented version to `39`.\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "## Bert-Based Depression Classier Model"
+      ],
+      "metadata": {
+        "id": "dzDl3HR6MRqf"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "from torch import nn, optim\n",
+        "import torch.nn.functional as F\n",
+        "import transformers\n",
+        "from transformers import BertModel, BertTokenizer, AdamW, get_linear_schedule_with_warmup\n",
+        "from collections import defaultdict\n",
+        "\n",
+        "class DepressionClassifier(nn.Module):\n",
+        "  def __init__(self, n_classes, pre_trained_model_name):\n",
+        "    super(DepressionClassifier, self).__init__()\n",
+        "    self.bert = BertModel.from_pretrained(pre_trained_model_name)\n",
+        "    self.drop = nn.Dropout(p=0.3)\n",
+        "    self.out = nn.Linear(self.bert.config.hidden_size, n_classes)\n",
+        "\n",
+        "  def forward(self, input_ids, attention_mask):\n",
+        "    _, pooled_output = self.bert(\n",
+        "      input_ids=input_ids,\n",
+        "      attention_mask=attention_mask,\n",
+        "      return_dict = False #here\n",
+        "    )\n",
+        "    output = self.drop(pooled_output)\n",
+        "    return self.out(output)"
+      ],
+      "metadata": {
+        "id": "frP5Mk_4NvSe"
+      },
+      "execution_count": 24,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "class_names = ['Not Depressed', 'Depressed']\n",
+        "model = DepressionClassifier(len(class_names), 'bert-base-cased')"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "TH0OMDamN32-",
+        "outputId": "3ec8d3f7-1dee-4c0f-f004-37bcc2112a16"
+      },
+      "execution_count": 25,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']\n",
+            "- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
+            "- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "## Training Functions"
+      ],
+      "metadata": {
+        "id": "wpJdcYItKqnN"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "from torch import nn, optim\n",
+        "import torch.nn.functional as F\n",
+        "import transformers\n",
+        "from transformers import BertModel, BertTokenizer, AdamW, get_linear_schedule_with_warmup\n",
+        "from collections import defaultdict\n",
+        "import matplotlib.pyplot as plt"
+      ],
+      "metadata": {
+        "id": "czXmMyUzLS7z"
+      },
+      "execution_count": 26,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "def train_epoch(model, data_loader, loss_fn, optimizer, device, scheduler, n_examples):\n",
+        "  model = model.train()\n",
+        "\n",
+        "  losses = []\n",
+        "  correct_predictions = 0\n",
+        "  \n",
+        "  for d in data_loader:\n",
+        "    input_ids = d[\"input_ids\"].to(device)\n",
+        "    attention_mask = d[\"attention_mask\"].to(device)\n",
+        "    depression = d[\"depression\"].to(device)\n",
+        "\n",
+        "    outputs = model(\n",
+        "      input_ids = input_ids,\n",
+        "      attention_mask = attention_mask\n",
+        "    )\n",
+        "\n",
+        "    _, preds = torch.max(outputs, dim=1)\n",
+        "    loss = loss_fn(outputs, depression)\n",
+        "\n",
+        "    correct_predictions += torch.sum(preds == depression)\n",
+        "    losses.append(loss.item())\n",
+        "\n",
+        "    loss.backward()\n",
+        "    nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)\n",
+        "    optimizer.step()\n",
+        "    scheduler.step()\n",
+        "    optimizer.zero_grad()\n",
+        "\n",
+        "  return correct_predictions.double() / n_examples, np.mean(losses)"
+      ],
+      "metadata": {
+        "id": "OZ9Ykhx9Kv9X"
+      },
+      "execution_count": 27,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "def eval_model(model, data_loader, loss_fn, device, n_examples):\n",
+        "  model = model.eval()\n",
+        "  losses = []\n",
+        "  correct_predictions = 0\n",
+        "\n",
+        "  with torch.no_grad():\n",
+        "    for d in data_loader:\n",
+        "      input_ids = d[\"input_ids\"].to(device)\n",
+        "      attention_mask = d[\"attention_mask\"].to(device)\n",
+        "      depression = d[\"depression\"].to(device)\n",
+        "\n",
+        "      outputs = model(\n",
+        "        input_ids = input_ids,\n",
+        "        attention_mask = attention_mask\n",
+        "      )\n",
+        "      _, preds = torch.max(outputs, dim=1)\n",
+        "\n",
+        "      loss = loss_fn(outputs, depression)\n",
+        "\n",
+        "      correct_predictions += torch.sum(preds == depression)\n",
+        "      losses.append(loss.item())\n",
+        "\n",
+        "  return correct_predictions.double() / n_examples, np.mean(losses)"
+      ],
+      "metadata": {
+        "id": "T6DMQmcrL0t6"
+      },
+      "execution_count": 28,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "def loss_accuracy_plots(history):\n",
+        "    plt.figure(1)\n",
+        "    plt.plot(history['train_loss'])\n",
+        "    plt.plot(history['val_loss'])\n",
+        "    plt.xlabel(\"Epochs [-]\")\n",
+        "    plt.ylabel(\"Loss [-]\")\n",
+        "    plt.legend(['Training loss','Validation loss'])\n",
+        "    plt.grid()\n",
+        "    plt.savefig(f\"/content/Training_losses_plot.jpg\")\n",
+        "    plt.figure(2)\n",
+        "    plt.plot(history['train_acc'])\n",
+        "    plt.plot(history['val_acc'])\n",
+        "    plt.xlabel(\"Epochs [-]\")\n",
+        "    plt.ylabel(\"Loss [-]\")\n",
+        "    plt.legend(['Training accuracy','Validation accuracy'])\n",
+        "    plt.grid()\n",
+        "    plt.savefig(f\"/content/Training_accuracies_plot.jpg\")"
+      ],
+      "metadata": {
+        "id": "JkAu-va5L34i"
+      },
+      "execution_count": 51,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "## Training Data"
+      ],
+      "metadata": {
+        "id": "rfslV1NJL7cj"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "gpu_info = !nvidia-smi\n",
+        "gpu_info = '\\n'.join(gpu_info)\n",
+        "if gpu_info.find('failed') >= 0:\n",
+        "  print('Not connected to a GPU')\n",
+        "else:\n",
+        "  print(gpu_info)"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "d_vJG_kuQlTw",
+        "outputId": "aff034a1-da7f-4159-f68b-82f6ba10812f"
+      },
+      "execution_count": 31,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Wed Jan 11 10:55:48 2023       \n",
+            "+-----------------------------------------------------------------------------+\n",
+            "| NVIDIA-SMI 460.32.03    Driver Version: 460.32.03    CUDA Version: 11.2     |\n",
+            "|-------------------------------+----------------------+----------------------+\n",
+            "| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |\n",
+            "| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |\n",
+            "|                               |                      |               MIG M. |\n",
+            "|===============================+======================+======================|\n",
+            "|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |\n",
+            "| N/A   55C    P0    29W /  70W |  10716MiB / 15109MiB |      0%      Default |\n",
+            "|                               |                      |                  N/A |\n",
+            "+-------------------------------+----------------------+----------------------+\n",
+            "                                                                               \n",
+            "+-----------------------------------------------------------------------------+\n",
+            "| Processes:                                                                  |\n",
+            "|  GPU   GI   CI        PID   Type   Process name                  GPU Memory |\n",
+            "|        ID   ID                                                   Usage      |\n",
+            "|=============================================================================|\n",
+            "+-----------------------------------------------------------------------------+\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n",
+        "model = model.to(device)\n",
+        "input_ids = data['input_ids'].to(device)\n",
+        "attention_mask = data['attention_mask'].to(device)"
+      ],
+      "metadata": {
+        "id": "ly__rDVkRwB2"
+      },
+      "execution_count": 32,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "F.softmax(model(input_ids, attention_mask), dim=1)"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "uLoWAKm3Wz8K",
+        "outputId": "d5713105-5c3d-40b4-82e4-5c6766852e5e"
+      },
+      "execution_count": 33,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "tensor([[0.6483, 0.3517],\n",
+              "        [0.7467, 0.2533],\n",
+              "        [0.7182, 0.2818],\n",
+              "        [0.6410, 0.3590],\n",
+              "        [0.4981, 0.5019],\n",
+              "        [0.6323, 0.3677],\n",
+              "        [0.3284, 0.6716],\n",
+              "        [0.6354, 0.3646],\n",
+              "        [0.5387, 0.4613],\n",
+              "        [0.5530, 0.4470],\n",
+              "        [0.5840, 0.4160],\n",
+              "        [0.6082, 0.3918],\n",
+              "        [0.5927, 0.4073],\n",
+              "        [0.5545, 0.4455],\n",
+              "        [0.7305, 0.2695],\n",
+              "        [0.6892, 0.3108]], device='cuda:0', grad_fn=<SoftmaxBackward0>)"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 33
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "import gc\n",
+        "gc.collect()\n",
+        "\n",
+        "optimizer = AdamW(model.parameters(), lr = 2e-5, correct_bias = False)\n",
+        "total_steps = len(train_data_loader) * EPOCHS\n",
+        "scheduler = get_linear_schedule_with_warmup(optimizer,\n",
+        "                                                num_warmup_steps = 0,\n",
+        "                                                num_training_steps = total_steps)\n",
+        "\n",
+        "loss_fn = nn.CrossEntropyLoss().to(device)\n",
+        "history = defaultdict(list)\n",
+        "best_accuracy = 0\n",
+        "\n",
+        "for epoch in range(EPOCHS):\n",
+        "  print(f'Epoch {epoch + 1}/{EPOCHS}')\n",
+        "  print('-' * 10)\n",
+        "  \n",
+        "  train_acc, train_loss = train_epoch(model, train_data_loader, loss_fn, optimizer, device, scheduler, len(train_message))\n",
+        "  \n",
+        "  print(f'Train loss {train_loss} accuracy {train_acc}')\n",
+        "  \n",
+        "  val_acc, val_loss = eval_model(model, test_data_loader, loss_fn, device, len(test_message))\n",
+        "  \n",
+        "  print(f'Val   loss {val_loss} accuracy {val_acc}')\n",
+        "  \n",
+        "  history['train_acc'].append(train_acc)\n",
+        "  history['train_loss'].append(train_loss)\n",
+        "  history['val_acc'].append(val_acc)\n",
+        "  history['val_loss'].append(val_loss)"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "RKbvtLNnW7dh",
+        "outputId": "1cc22ebc-bf68-4d97-f976-f37d92bc7993"
+      },
+      "execution_count": 41,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Epoch 1/3\n",
+            "----------\n",
+            "Train loss 0.032615248548951696 accuracy 0.9951367781155015\n",
+            "Val   loss 0.03613543838475535 accuracy 0.9941662615459407\n",
+            "Epoch 2/3\n",
+            "----------\n",
+            "Train loss 0.021585255281155413 accuracy 0.9958662613981764\n",
+            "Val   loss 0.008615166831007156 accuracy 0.9990277102576568\n",
+            "Epoch 3/3\n",
+            "----------\n",
+            "Train loss 0.003893426973731551 accuracy 0.9993920972644377\n",
+            "Val   loss 0.009192386632538158 accuracy 0.9985415653864851\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "from google.colab import drive\n",
+        "drive.mount('/content/drive')\n",
+        "torch.save(model.state_dict(), '/content/drive/MyDrive/data/weights.pth')"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "asNHjpLTZOJQ",
+        "outputId": "bc24d7ab-e05e-451c-dbe4-52328ccf71ac"
+      },
+      "execution_count": 55,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Mounted at /content/drive\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "import os\n",
+        "import joblib\n",
+        "from hsml.schema import Schema\n",
+        "from hsml.model_schema import ModelSchema\n",
+        "from sklearn.metrics import classification_report\n",
+        "\n",
+        "# We will now upload our model to the Hopsworks Model Registry. First get an object for the model registry.\n",
+        "mr = project.get_model_registry()\n",
+        "    \n",
+        "# The contents of the directory will be saved to the model registry. Create the dir, first.\n",
+        "model_dir=\"sentimental_analysis_model\"\n",
+        "if os.path.isdir(model_dir) == False:\n",
+        "  os.mkdir(model_dir)\n",
+        "\n",
+        "# Save both our model and the confusion matrix to 'model_dir', whose contents will be uploaded to the model registry\n",
+        "joblib.dump(model, model_dir + \"/sentimental_analysis_model.pkl\")  \n",
+        "\n",
+        "\n",
+        "# Specify the schema of the model's input/output using the features (X_train) and labels (y_train)\n",
+        "input_schema = Schema(train_message)\n",
+        "output_schema = Schema(train_depression)\n",
+        "model_schema = ModelSchema(input_schema, output_schema)\n",
+        "\n",
+        "# Create an entry in the model registry that includes the model's name, desc, metrics\n",
+        "sentimental_analysis_model = mr.python.create_model(\n",
+        "    name=\"sentimental_analysis_model\", \n",
+        "    model_schema=model_schema,\n",
+        "    description=\"Sentimental Analysis Predictor\"\n",
+        ")\n",
+        "    \n",
+        "# Upload the model to the model registry, including all files in 'model_dir'\n",
+        "sentimental_analysis_model.save(model_dir)"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 103,
+          "referenced_widgets": [
+            "23633252c1024924905ec679b76afcff",
+            "c2388f6069984613b88dc84ddb8e4fde",
+            "49e6c1619fdc4e57baf4d981828fc141",
+            "67459de96a474b3c89d12c259823fe8f",
+            "096988fe730241bca5b4647c3f5ac561",
+            "432ca53539984f6f8d38ff46c3afa42c",
+            "48d442f8e826410da171ab3c54bee0ee",
+            "2571df81b38e490b8752309bd485b91e",
+            "02d2d92f6f754d6a9a6b9ed63d5dbed2",
+            "918c8791a4cb4fc08f16f49bbd2cd73f",
+            "3058453f9373468d9f09a5867c834d18"
+          ]
+        },
+        "id": "PNbxNGUimwj8",
+        "outputId": "2e775988-7d2e-46d7-dba7-30896b30f7ac"
+      },
+      "execution_count": 56,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Connected. Call `.close()` to terminate connection gracefully.\n"
+          ]
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "  0%|          | 0/6 [00:00<?, ?it/s]"
+            ],
+            "application/vnd.jupyter.widget-view+json": {
+              "version_major": 2,
+              "version_minor": 0,
+              "model_id": "23633252c1024924905ec679b76afcff"
+            }
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Model created, explore it at https://c.app.hopsworks.ai:443/p/5311/models/sentimental_analysis_model/1\n"
+          ]
+        },
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "Model(name: 'sentimental_analysis_model', version: 1)"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 56
+        }
+      ]
+    }
+  ]
+}