{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" }, "accelerator": "GPU", "gpuClass": "standard", "widgets": { "application/vnd.jupyter.widget-state+json": { "23633252c1024924905ec679b76afcff": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_c2388f6069984613b88dc84ddb8e4fde", "IPY_MODEL_49e6c1619fdc4e57baf4d981828fc141", "IPY_MODEL_67459de96a474b3c89d12c259823fe8f" ], "layout": "IPY_MODEL_096988fe730241bca5b4647c3f5ac561" } }, "c2388f6069984613b88dc84ddb8e4fde": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_432ca53539984f6f8d38ff46c3afa42c", "placeholder": "​", "style": "IPY_MODEL_48d442f8e826410da171ab3c54bee0ee", "value": "Model export complete: 100%" } }, "49e6c1619fdc4e57baf4d981828fc141": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_2571df81b38e490b8752309bd485b91e", "max": 6, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_02d2d92f6f754d6a9a6b9ed63d5dbed2", "value": 6 } }, "67459de96a474b3c89d12c259823fe8f": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_918c8791a4cb4fc08f16f49bbd2cd73f", "placeholder": "​", "style": "IPY_MODEL_3058453f9373468d9f09a5867c834d18", "value": " 6/6 [05:03<00:00, 54.56s/it]" } }, "096988fe730241bca5b4647c3f5ac561": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "432ca53539984f6f8d38ff46c3afa42c": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "48d442f8e826410da171ab3c54bee0ee": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "2571df81b38e490b8752309bd485b91e": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "02d2d92f6f754d6a9a6b9ed63d5dbed2": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "918c8791a4cb4fc08f16f49bbd2cd73f": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "3058453f9373468d9f09a5867c834d18": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } } } } }, "cells": [ { "cell_type": "code", "execution_count": null, "metadata": { "id": "KNG3EMWB9woD" }, "outputs": [], "source": [ "!pip install click==8.0.3\n", "!pip install cloudml_hypertune==0.1.0.dev6\n", "!pip install hypertune==0.0.0\n", "!pip uninstall matplotlib\n", "!pip install matplotlib==3.1.3\n", "!pip install numpy==1.20.3\n", "!pip install pandas==1.3.4\n", "!pip install protobuf==3.19.3\n", "!pip install python-dotenv==0.19.2\n", "!pip install cikit_learn==1.0.2\n", "!pip install torch==1.10.1\n", "!pip install transformers==4.15.0\n", "!pip install hopsworks" ] }, { "cell_type": "code", "source": [ "import warnings\n", "warnings.filterwarnings(\"ignore\")" ], "metadata": { "id": "9jQ-nMBYH1mB" }, "execution_count": 2, "outputs": [] }, { "cell_type": "code", "source": [ "import hopsworks\n", "project = hopsworks.login()" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "xfOcg7kX_G15", "outputId": "764a5c83-0b44-42fa-ec56-f5fea94c35ed" }, "execution_count": 3, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Copy your Api Key (first register/login): https://c.app.hopsworks.ai/account/api/generated\n", "\n", "Paste it here: ··········\n", "Connected. Call `.close()` to terminate connection gracefully.\n", "\n", "Multiple projects found. \n", "\n", "\t (1) liangc40\n", "\t (2) Lab1_for_iris\n", "\n", "Enter project to access: 1\n", "\n", "Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/5311\n" ] } ] }, { "cell_type": "markdown", "source": [ "## Load Feature from Hopsworks" ], "metadata": { "id": "AS56zXEDCeae" } }, { "cell_type": "code", "source": [ "fs = project.get_feature_store()\n", "try: \n", " feature_view = fs.get_feature_view(name=\"sentimental_analysis_feature_group\", version=1)\n", "except:\n", " fg = fs.get_feature_group(name=\"sentimental_analysis_feature_group\", version=1)\n", " query = fg.select_all()\n", " feature_view = fs.create_feature_view(name=\"sentimental_analysis_feature_group\",\n", " version=1,\n", " description=\"Read from pre-processed sentimental analysis dataset\",\n", " labels=[\"label\"],\n", " query=query) " ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "ck9vNlZj_cRA", "outputId": "1dbcae12-51cf-4a38-d77e-dd94e0201299" }, "execution_count": 4, "outputs": [ { "output_type": "stream", "name": "stderr", "text": [ "DeprecationWarning: Using or importing the ABCs from 'collections' instead of from 'collections.abc' is deprecated since Python 3.3, and in 3.10 it will stop working\n", "DeprecationWarning: Using or importing the ABCs from 'collections' instead of from 'collections.abc' is deprecated since Python 3.3, and in 3.10 it will stop working\n" ] }, { "output_type": "stream", "name": "stdout", "text": [ "Connected. Call `.close()` to terminate connection gracefully.\n" ] } ] }, { "cell_type": "markdown", "source": [ "## Create DataLoader and TweetsDataset" ], "metadata": { "id": "nts7RyyHCmlJ" } }, { "cell_type": "code", "source": [ "BATCH_SIZE = 16\n", "MAX_LEN = 160\n", "EPOCHS = 3" ], "metadata": { "id": "zwfWbehIEZWH" }, "execution_count": 35, "outputs": [] }, { "cell_type": "code", "source": [ "from torch.utils.data import Dataset, DataLoader\n", "from sklearn.model_selection import train_test_split\n", "import torch\n", "import numpy as np\n", "from transformers import BertModel, BertTokenizer, AdamW, get_linear_schedule_with_warmup\n", "\n", "class TweetsDataset(Dataset):\n", " def __init__(self, message, depression, tokenizer, max_len):\n", " self.message = message\n", " self.depression = depression\n", " self.tokenizer = tokenizer\n", " self.max_len = max_len\n", " \n", " def __len__(self):\n", " return len(self.message)\n", " \n", " def __getitem__(self, item):\n", " message = str(self.message[item])\n", " depression = self.depression[item]\n", "\n", " encoding = self.tokenizer.encode_plus(\n", " message,\n", " add_special_tokens=True,\n", " max_length=self.max_len,\n", " return_token_type_ids=False,\n", " truncation=True,\n", " pad_to_max_length=True,\n", " return_attention_mask=True,\n", " return_tensors='pt',\n", " )\n", "\n", " return {\n", " 'tweet_text': message,\n", " 'input_ids': encoding['input_ids'].flatten(),\n", " 'attention_mask': encoding['attention_mask'].flatten(),\n", " 'depression': torch.tensor(depression, dtype=torch.long)\n", " }" ], "metadata": { "id": "Icpi3iw7CRBu" }, "execution_count": 6, "outputs": [] }, { "cell_type": "code", "source": [ "def create_data_loader(message, depression, tokenizer, max_len, batch_size):\n", " ds = TweetsDataset(\n", " message = message['message'].to_numpy(),\n", " depression = depression['label'].to_numpy(),\n", " tokenizer=tokenizer,\n", " max_len=max_len\n", " )\n", "\n", " return DataLoader(\n", " ds,\n", " batch_size = batch_size,\n", " num_workers = 9\n", " )" ], "metadata": { "id": "UzKUaFdOCU98" }, "execution_count": 22, "outputs": [] }, { "cell_type": "code", "source": [ "train_message, test_message, train_depression, test_depression = feature_view.train_test_split(0.2)\n", "\n", "#Creating dataloaders\n", "tokenizer = BertTokenizer.from_pretrained('bert-base-cased')\n", "train_data_loader = create_data_loader(train_message, train_depression, tokenizer, MAX_LEN, BATCH_SIZE)\n", "test_data_loader = create_data_loader(test_message, test_depression, tokenizer, MAX_LEN, BATCH_SIZE)\n", "data = next(iter(train_data_loader))" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "QLzTqeQ7DDTs", "outputId": "4c4b73fd-1b23-40a2-ca23-39efcfb9db72" }, "execution_count": 23, "outputs": [ { "output_type": "stream", "name": "stderr", "text": [ "VersionWarning: Incremented version to `39`.\n" ] } ] }, { "cell_type": "markdown", "source": [ "## Bert-Based Depression Classier Model" ], "metadata": { "id": "dzDl3HR6MRqf" } }, { "cell_type": "code", "source": [ "from torch import nn, optim\n", "import torch.nn.functional as F\n", "import transformers\n", "from transformers import BertModel, BertTokenizer, AdamW, get_linear_schedule_with_warmup\n", "from collections import defaultdict\n", "\n", "class DepressionClassifier(nn.Module):\n", " def __init__(self, n_classes, pre_trained_model_name):\n", " super(DepressionClassifier, self).__init__()\n", " self.bert = BertModel.from_pretrained(pre_trained_model_name)\n", " self.drop = nn.Dropout(p=0.3)\n", " self.out = nn.Linear(self.bert.config.hidden_size, n_classes)\n", "\n", " def forward(self, input_ids, attention_mask):\n", " _, pooled_output = self.bert(\n", " input_ids=input_ids,\n", " attention_mask=attention_mask,\n", " return_dict = False #here\n", " )\n", " output = self.drop(pooled_output)\n", " return self.out(output)" ], "metadata": { "id": "frP5Mk_4NvSe" }, "execution_count": 24, "outputs": [] }, { "cell_type": "code", "source": [ "class_names = ['Not Depressed', 'Depressed']\n", "model = DepressionClassifier(len(class_names), 'bert-base-cased')" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "TH0OMDamN32-", "outputId": "3ec8d3f7-1dee-4c0f-f004-37bcc2112a16" }, "execution_count": 25, "outputs": [ { "output_type": "stream", "name": "stderr", "text": [ "Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']\n", "- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", "- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n" ] } ] }, { "cell_type": "markdown", "source": [ "## Training Functions" ], "metadata": { "id": "wpJdcYItKqnN" } }, { "cell_type": "code", "source": [ "from torch import nn, optim\n", "import torch.nn.functional as F\n", "import transformers\n", "from transformers import BertModel, BertTokenizer, AdamW, get_linear_schedule_with_warmup\n", "from collections import defaultdict\n", "import matplotlib.pyplot as plt" ], "metadata": { "id": "czXmMyUzLS7z" }, "execution_count": 26, "outputs": [] }, { "cell_type": "code", "source": [ "def train_epoch(model, data_loader, loss_fn, optimizer, device, scheduler, n_examples):\n", " model = model.train()\n", "\n", " losses = []\n", " correct_predictions = 0\n", " \n", " for d in data_loader:\n", " input_ids = d[\"input_ids\"].to(device)\n", " attention_mask = d[\"attention_mask\"].to(device)\n", " depression = d[\"depression\"].to(device)\n", "\n", " outputs = model(\n", " input_ids = input_ids,\n", " attention_mask = attention_mask\n", " )\n", "\n", " _, preds = torch.max(outputs, dim=1)\n", " loss = loss_fn(outputs, depression)\n", "\n", " correct_predictions += torch.sum(preds == depression)\n", " losses.append(loss.item())\n", "\n", " loss.backward()\n", " nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)\n", " optimizer.step()\n", " scheduler.step()\n", " optimizer.zero_grad()\n", "\n", " return correct_predictions.double() / n_examples, np.mean(losses)" ], "metadata": { "id": "OZ9Ykhx9Kv9X" }, "execution_count": 27, "outputs": [] }, { "cell_type": "code", "source": [ "def eval_model(model, data_loader, loss_fn, device, n_examples):\n", " model = model.eval()\n", " losses = []\n", " correct_predictions = 0\n", "\n", " with torch.no_grad():\n", " for d in data_loader:\n", " input_ids = d[\"input_ids\"].to(device)\n", " attention_mask = d[\"attention_mask\"].to(device)\n", " depression = d[\"depression\"].to(device)\n", "\n", " outputs = model(\n", " input_ids = input_ids,\n", " attention_mask = attention_mask\n", " )\n", " _, preds = torch.max(outputs, dim=1)\n", "\n", " loss = loss_fn(outputs, depression)\n", "\n", " correct_predictions += torch.sum(preds == depression)\n", " losses.append(loss.item())\n", "\n", " return correct_predictions.double() / n_examples, np.mean(losses)" ], "metadata": { "id": "T6DMQmcrL0t6" }, "execution_count": 28, "outputs": [] }, { "cell_type": "code", "source": [ "def loss_accuracy_plots(history):\n", " plt.figure(1)\n", " plt.plot(history['train_loss'])\n", " plt.plot(history['val_loss'])\n", " plt.xlabel(\"Epochs [-]\")\n", " plt.ylabel(\"Loss [-]\")\n", " plt.legend(['Training loss','Validation loss'])\n", " plt.grid()\n", " plt.savefig(f\"/content/Training_losses_plot.jpg\")\n", " plt.figure(2)\n", " plt.plot(history['train_acc'])\n", " plt.plot(history['val_acc'])\n", " plt.xlabel(\"Epochs [-]\")\n", " plt.ylabel(\"Loss [-]\")\n", " plt.legend(['Training accuracy','Validation accuracy'])\n", " plt.grid()\n", " plt.savefig(f\"/content/Training_accuracies_plot.jpg\")" ], "metadata": { "id": "JkAu-va5L34i" }, "execution_count": 51, "outputs": [] }, { "cell_type": "markdown", "source": [ "## Training Data" ], "metadata": { "id": "rfslV1NJL7cj" } }, { "cell_type": "code", "source": [ "gpu_info = !nvidia-smi\n", "gpu_info = '\\n'.join(gpu_info)\n", "if gpu_info.find('failed') >= 0:\n", " print('Not connected to a GPU')\n", "else:\n", " print(gpu_info)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "d_vJG_kuQlTw", "outputId": "aff034a1-da7f-4159-f68b-82f6ba10812f" }, "execution_count": 31, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Wed Jan 11 10:55:48 2023 \n", "+-----------------------------------------------------------------------------+\n", "| NVIDIA-SMI 460.32.03 Driver Version: 460.32.03 CUDA Version: 11.2 |\n", "|-------------------------------+----------------------+----------------------+\n", "| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |\n", "| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |\n", "| | | MIG M. |\n", "|===============================+======================+======================|\n", "| 0 Tesla T4 Off | 00000000:00:04.0 Off | 0 |\n", "| N/A 55C P0 29W / 70W | 10716MiB / 15109MiB | 0% Default |\n", "| | | N/A |\n", "+-------------------------------+----------------------+----------------------+\n", " \n", "+-----------------------------------------------------------------------------+\n", "| Processes: |\n", "| GPU GI CI PID Type Process name GPU Memory |\n", "| ID ID Usage |\n", "|=============================================================================|\n", "+-----------------------------------------------------------------------------+\n" ] } ] }, { "cell_type": "code", "source": [ "device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n", "model = model.to(device)\n", "input_ids = data['input_ids'].to(device)\n", "attention_mask = data['attention_mask'].to(device)" ], "metadata": { "id": "ly__rDVkRwB2" }, "execution_count": 32, "outputs": [] }, { "cell_type": "code", "source": [ "F.softmax(model(input_ids, attention_mask), dim=1)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "uLoWAKm3Wz8K", "outputId": "d5713105-5c3d-40b4-82e4-5c6766852e5e" }, "execution_count": 33, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "tensor([[0.6483, 0.3517],\n", " [0.7467, 0.2533],\n", " [0.7182, 0.2818],\n", " [0.6410, 0.3590],\n", " [0.4981, 0.5019],\n", " [0.6323, 0.3677],\n", " [0.3284, 0.6716],\n", " [0.6354, 0.3646],\n", " [0.5387, 0.4613],\n", " [0.5530, 0.4470],\n", " [0.5840, 0.4160],\n", " [0.6082, 0.3918],\n", " [0.5927, 0.4073],\n", " [0.5545, 0.4455],\n", " [0.7305, 0.2695],\n", " [0.6892, 0.3108]], device='cuda:0', grad_fn=)" ] }, "metadata": {}, "execution_count": 33 } ] }, { "cell_type": "code", "source": [ "import gc\n", "gc.collect()\n", "\n", "optimizer = AdamW(model.parameters(), lr = 2e-5, correct_bias = False)\n", "total_steps = len(train_data_loader) * EPOCHS\n", "scheduler = get_linear_schedule_with_warmup(optimizer,\n", " num_warmup_steps = 0,\n", " num_training_steps = total_steps)\n", "\n", "loss_fn = nn.CrossEntropyLoss().to(device)\n", "history = defaultdict(list)\n", "best_accuracy = 0\n", "\n", "for epoch in range(EPOCHS):\n", " print(f'Epoch {epoch + 1}/{EPOCHS}')\n", " print('-' * 10)\n", " \n", " train_acc, train_loss = train_epoch(model, train_data_loader, loss_fn, optimizer, device, scheduler, len(train_message))\n", " \n", " print(f'Train loss {train_loss} accuracy {train_acc}')\n", " \n", " val_acc, val_loss = eval_model(model, test_data_loader, loss_fn, device, len(test_message))\n", " \n", " print(f'Val loss {val_loss} accuracy {val_acc}')\n", " \n", " history['train_acc'].append(train_acc)\n", " history['train_loss'].append(train_loss)\n", " history['val_acc'].append(val_acc)\n", " history['val_loss'].append(val_loss)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "RKbvtLNnW7dh", "outputId": "1cc22ebc-bf68-4d97-f976-f37d92bc7993" }, "execution_count": 41, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Epoch 1/3\n", "----------\n", "Train loss 0.032615248548951696 accuracy 0.9951367781155015\n", "Val loss 0.03613543838475535 accuracy 0.9941662615459407\n", "Epoch 2/3\n", "----------\n", "Train loss 0.021585255281155413 accuracy 0.9958662613981764\n", "Val loss 0.008615166831007156 accuracy 0.9990277102576568\n", "Epoch 3/3\n", "----------\n", "Train loss 0.003893426973731551 accuracy 0.9993920972644377\n", "Val loss 0.009192386632538158 accuracy 0.9985415653864851\n" ] } ] }, { "cell_type": "code", "source": [ "from google.colab import drive\n", "drive.mount('/content/drive')\n", "torch.save(model.state_dict(), '/content/drive/MyDrive/data/weights.pth')" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "asNHjpLTZOJQ", "outputId": "bc24d7ab-e05e-451c-dbe4-52328ccf71ac" }, "execution_count": 55, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Mounted at /content/drive\n" ] } ] }, { "cell_type": "code", "source": [ "import os\n", "import joblib\n", "from hsml.schema import Schema\n", "from hsml.model_schema import ModelSchema\n", "from sklearn.metrics import classification_report\n", "\n", "# We will now upload our model to the Hopsworks Model Registry. First get an object for the model registry.\n", "mr = project.get_model_registry()\n", " \n", "# The contents of the directory will be saved to the model registry. Create the dir, first.\n", "model_dir=\"sentimental_analysis_model\"\n", "if os.path.isdir(model_dir) == False:\n", " os.mkdir(model_dir)\n", "\n", "# Save both our model and the confusion matrix to 'model_dir', whose contents will be uploaded to the model registry\n", "joblib.dump(model, model_dir + \"/sentimental_analysis_model.pkl\") \n", "\n", "\n", "# Specify the schema of the model's input/output using the features (X_train) and labels (y_train)\n", "input_schema = Schema(train_message)\n", "output_schema = Schema(train_depression)\n", "model_schema = ModelSchema(input_schema, output_schema)\n", "\n", "# Create an entry in the model registry that includes the model's name, desc, metrics\n", "sentimental_analysis_model = mr.python.create_model(\n", " name=\"sentimental_analysis_model\", \n", " model_schema=model_schema,\n", " description=\"Sentimental Analysis Predictor\"\n", ")\n", " \n", "# Upload the model to the model registry, including all files in 'model_dir'\n", "sentimental_analysis_model.save(model_dir)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 103, "referenced_widgets": [ "23633252c1024924905ec679b76afcff", "c2388f6069984613b88dc84ddb8e4fde", "49e6c1619fdc4e57baf4d981828fc141", "67459de96a474b3c89d12c259823fe8f", "096988fe730241bca5b4647c3f5ac561", "432ca53539984f6f8d38ff46c3afa42c", "48d442f8e826410da171ab3c54bee0ee", "2571df81b38e490b8752309bd485b91e", "02d2d92f6f754d6a9a6b9ed63d5dbed2", "918c8791a4cb4fc08f16f49bbd2cd73f", "3058453f9373468d9f09a5867c834d18" ] }, "id": "PNbxNGUimwj8", "outputId": "2e775988-7d2e-46d7-dba7-30896b30f7ac" }, "execution_count": 56, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Connected. Call `.close()` to terminate connection gracefully.\n" ] }, { "output_type": "display_data", "data": { "text/plain": [ " 0%| | 0/6 [00:00