Spaces:

jbraha
/

aiproject

Runtime error

App Files Files Community

jbraha commited on Apr 24, 2023

Commit

369c9ca

1 Parent(s): f2a478c

working trainer

Browse files

Files changed (12) hide show

.ipynb_checkpoints/Copy_of_Copy_of_training-checkpoint.ipynb +0 -0
Copy_of_Copy_of_training.ipynb +345 -0
logs/1682300361.4426298/events.out.tfevents.1682300361.mint.371280.1 +0 -0
logs/1682300884.6095285/events.out.tfevents.1682300884.mint.371280.3 +0 -0
logs/1682300938.1223385/events.out.tfevents.1682300938.mint.371280.5 +0 -0
logs/1682301013.2686887/events.out.tfevents.1682301013.mint.371280.7 +0 -0
logs/events.out.tfevents.1682300361.mint.371280.0 +0 -0
logs/events.out.tfevents.1682300884.mint.371280.2 +0 -0
logs/events.out.tfevents.1682300938.mint.371280.4 +0 -0
logs/events.out.tfevents.1682301013.mint.371280.6 +0 -0
train.py +50 -55
working_training.ipynb +601 -0

.ipynb_checkpoints/Copy_of_Copy_of_training-checkpoint.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

Copy_of_Copy_of_training.ipynb ADDED Viewed

	@@ -0,0 +1,345 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "215a1aae",
+   "metadata": {
+    "id": "215a1aae"
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2023-04-23 21:39:14.489766: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n",
+      "To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
+      "2023-04-23 21:39:15.104927: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n"
+     ]
+    }
+   ],
+   "source": [
+    "import torch\n",
+    "from torch.utils.data import Dataset, DataLoader\n",
+    "\n",
+    "import pandas as pd\n",
+    "\n",
+    "from transformers import BertTokenizerFast, BertForSequenceClassification\n",
+    "from transformers import Trainer, TrainingArguments"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "J5Tlgp4tNd0U",
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "J5Tlgp4tNd0U",
+    "outputId": "f2eef2ee-7d9d-4f5b-e35c-e6015e68f59e"
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight']\n",
+      "- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
+      "- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
+      "Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']\n",
+      "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
+     ]
+    }
+   ],
+   "source": [
+    "model_name = \"bert-base-uncased\"\n",
+    "tokenizer = BertTokenizerFast.from_pretrained(model_name)\n",
+    "model = BertForSequenceClassification.from_pretrained(model_name, num_labels=6)\n",
+    "model = model.to(\"cuda:0\")\n",
+    "max_len = 200\n",
+    "\n",
+    "training_args = TrainingArguments(\n",
+    "    output_dir=\"results\",\n",
+    "    num_train_epochs=1,\n",
+    "    per_device_train_batch_size=16,\n",
+    "    per_device_eval_batch_size=64,\n",
+    "    warmup_steps=500,\n",
+    "    learning_rate=5e-5,\n",
+    "    weight_decay=0.01,\n",
+    "    logging_dir=\"./logs\",\n",
+    "    logging_steps=10\n",
+    "    )\n",
+    "\n",
+    "# dataset class that inherits from torch.utils.data.Dataset\n",
+    "\n",
+    "    \n",
+    "class TokenizerDataset(Dataset):\n",
+    "    def __init__(self, strings):\n",
+    "        self.strings = strings\n",
+    "    \n",
+    "    def __getitem__(self, idx):\n",
+    "        return self.strings[idx]\n",
+    "    \n",
+    "    def __len__(self):\n",
+    "        return len(self.strings)\n",
+    "    "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "9969c58c",
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "9969c58c",
+    "outputId": "5933b10b-9ddb-4b67-b66b-589207bef2d3",
+    "scrolled": false
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "                      id                                       comment_text  \\\n",
+      "0       0000997932d777bf  Explanation\\nWhy the edits made under my usern...   \n",
+      "1       000103f0d9cfb60f  D'aww! He matches this background colour I'm s...   \n",
+      "2       000113f07ec002fd  Hey man, I'm really not trying to edit war. It...   \n",
+      "3       0001b41b1c6bb37e  \"\\nMore\\nI can't make any real suggestions on ...   \n",
+      "4       0001d958c54c6e35  You, sir, are my hero. Any chance you remember...   \n",
+      "...                  ...                                                ...   \n",
+      "159566  ffe987279560d7ff  \":::::And for the second time of asking, when ...   \n",
+      "159567  ffea4adeee384e90  You should be ashamed of yourself \\n\\nThat is ...   \n",
+      "159568  ffee36eab5c267c9  Spitzer \\n\\nUmm, theres no actual article for ...   \n",
+      "159569  fff125370e4aaaf3  And it looks like it was actually you who put ...   \n",
+      "159570  fff46fc426af1f9a  \"\\nAnd ... I really don't think you understand...   \n",
+      "\n",
+      "        toxic  severe_toxic  obscene  threat  insult  identity_hate  \n",
+      "0           0             0        0       0       0              0  \n",
+      "1           0             0        0       0       0              0  \n",
+      "2           0             0        0       0       0              0  \n",
+      "3           0             0        0       0       0              0  \n",
+      "4           0             0        0       0       0              0  \n",
+      "...       ...           ...      ...     ...     ...            ...  \n",
+      "159566      0             0        0       0       0              0  \n",
+      "159567      0             0        0       0       0              0  \n",
+      "159568      0             0        0       0       0              0  \n",
+      "159569      0             0        0       0       0              0  \n",
+      "159570      0             0        0       0       0              0  \n",
+      "\n",
+      "[159571 rows x 8 columns]\n"
+     ]
+    }
+   ],
+   "source": [
+    "train_data = pd.read_csv(\"data/train.csv\")\n",
+    "print(train_data)\n",
+    "train_text = train_data[\"comment_text\"]\n",
+    "train_labels = train_data[[\"toxic\", \"severe_toxic\", \n",
+    "                           \"obscene\", \"threat\", \n",
+    "                           \"insult\", \"identity_hate\"]]\n",
+    "\n",
+    "test_text = pd.read_csv(\"data/test.csv\")[\"comment_text\"]\n",
+    "test_labels = pd.read_csv(\"data/test_labels.csv\")[[\n",
+    "                           \"toxic\", \"severe_toxic\", \n",
+    "                           \"obscene\", \"threat\", \n",
+    "                           \"insult\", \"identity_hate\"]]\n",
+    "\n",
+    "# data preprocessing\n",
+    "\n",
+    "\n",
+    "\n",
+    "train_text = train_text.values.tolist()\n",
+    "train_labels = train_labels.values.tolist()\n",
+    "test_text = test_text.values.tolist()\n",
+    "test_labels = test_labels.values.tolist()\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "1n56TME9Njde",
+   "metadata": {
+    "id": "1n56TME9Njde"
+   },
+   "outputs": [],
+   "source": [
+    "# prepare tokenizer and dataset\n",
+    "\n",
+    "class TweetDataset(Dataset):\n",
+    "    def __init__(self, encodings, labels):\n",
+    "        self.encodings = encodings\n",
+    "        self.labels = labels\n",
+    "        self.tok = tokenizer\n",
+    "    \n",
+    "    def __getitem__(self, idx):\n",
+    "#         print(idx)\n",
+    "        print(len(self.labels))\n",
+    "        encoding = self.tok(self.encodings.strings[idx], truncation=True, padding=\"max_length\", max_length=max_len).to(\"cuda:0\")\n",
+    "        print(encoding.items())\n",
+    "        item = { key: torch.tensor(val) for key, val in encoding.items() }\n",
+    "        item['labels'] = torch.tensor(self.labels[idx])\n",
+    "#         print(item)\n",
+    "        return item\n",
+    "    \n",
+    "    def __len__(self):\n",
+    "        return len(self.labels)\n",
+    "\n",
+    "# no tokenizer\n",
+    "class TweetDataset2(Dataset):\n",
+    "    def __init__(self, encodings, labels):\n",
+    "        self.encodings = encodings\n",
+    "        self.labels = labels\n",
+    "        self.tok = tokenizer\n",
+    "    \n",
+    "    def __getitem__(self, idx):\n",
+    "#         print(idx)\n",
+    "        print(len(self.labels))\n",
+    "        encoding = self.tok(self.encodings.strings[idx], truncation=True, padding=\"max_length\", max_length=max_len).to(\"cuda:0\")\n",
+    "        print(encoding.items())\n",
+    "        item = { key: torch.tensor(val) for key, val in encoding.items() }\n",
+    "        item['labels'] = torch.tensor(self.labels[idx])\n",
+    "#         print(item)\n",
+    "        return item\n",
+    "    \n",
+    "    def __len__(self):\n",
+    "        return len(self.labels)\n",
+    "\n",
+    "\n",
+    "\n",
+    "\n",
+    "train_strings = TokenizerDataset(train_text)\n",
+    "test_strings = TokenizerDataset(test_text)\n",
+    "\n",
+    "train_dataloader = DataLoader(train_strings, batch_size=16, shuffle=True)\n",
+    "test_dataloader = DataLoader(test_strings, batch_size=16, shuffle=True)\n",
+    "\n",
+    "\n",
+    "\n",
+    "\n",
+    "train_encodings = tokenizer.batch_encode_plus(train_text, \\\n",
+    "                            max_length=200, pad_to_max_length=True, \\\n",
+    "                            truncation=True, return_token_type_ids=False, return_tensors='pt' \\\n",
+    "                            ).to(\"cuda:0\")\n",
+    "test_encodings = tokenizer.batch_encode_plus(test_text, \\\n",
+    "                            max_length=200, pad_to_max_length=True, \\\n",
+    "                            truncation=True, return_token_type_ids=False, return_tensors='pt' \\\n",
+    "                            ).to(\"cuda:0\")\n",
+    "\n",
+    "# train_encodings = tokenizer(train_text, truncation=True, padding=True)\n",
+    "# test_encodings = tokenizer(test_text, truncation=True, padding=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "id": "4kwydz67qjW9",
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "4kwydz67qjW9",
+    "outputId": "1653744e-69cf-46f8-a2d1-ffc3a3a4d58a"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "159571\n",
+      "159571\n"
+     ]
+    }
+   ],
+   "source": [
+    "# no tokenizer\n",
+    "class TweetDataset3(Dataset):\n",
+    "    def __init__(self, encodings, labels):\n",
+    "        self.encodings = encodings\n",
+    "        self.labels = labels\n",
+    "        self.tok = tokenizer\n",
+    "    \n",
+    "    def __getitem__(self, idx):\n",
+    "        print(idx)\n",
+    "        item = { key: torch.tensor(val) for key, val in self.encodings.items() }\n",
+    "        item['labels'] = torch.tensor(self.labels[idx])\n",
+    "#         print(item)\n",
+    "        return item\n",
+    "    \n",
+    "    def __len__(self):\n",
+    "        return len(self.labels)\n",
+    "\n",
+    "\n",
+    "\n",
+    "train_dataset = TweetDataset3(train_encodings, train_labels)\n",
+    "test_dataset = TweetDataset3(test_encodings, test_labels)\n",
+    "\n",
+    "print(len(train_dataset.labels))\n",
+    "print(len(train_strings))\n",
+    "\n",
+    "\n",
+    "class MultilabelTrainer(Trainer):\n",
+    "    def compute_loss(self, model, inputs, return_outputs=False):\n",
+    "        labels = inputs.pop(\"labels\")\n",
+    "        outputs = model(**inputs)\n",
+    "        logits = outputs.logits\n",
+    "        loss_fct = torch.nn.BCEWithLogitsLoss()\n",
+    "        loss = loss_fct(logits.view(-1, self.model.config.num_labels), \n",
+    "                        labels.float().view(-1, self.model.config.num_labels))\n",
+    "        return (loss, outputs) if return_outputs else loss\n",
+    "\n",
+    "\n",
+    "# training\n",
+    "trainer = MultilabelTrainer(\n",
+    "    model=model, \n",
+    "    args=training_args, \n",
+    "    train_dataset=train_dataset, \n",
+    "    eval_dataset=test_dataset\n",
+    "    )"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "VwsyMZg_tgTg",
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 1000
+    },
+    "id": "VwsyMZg_tgTg",
+    "outputId": "6cf8f3aa-629e-4650-9bbd-dfeb11071ef7"
+   },
+   "outputs": [],
+   "source": [
+    "trainer.train()"
+   ]
+  }
+ ],
+ "metadata": {
+  "colab": {
+   "provenance": []
+  },
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

logs/1682300361.4426298/events.out.tfevents.1682300361.mint.371280.1 ADDED Viewed

Binary file (5.8 kB). View file

logs/1682300884.6095285/events.out.tfevents.1682300884.mint.371280.3 ADDED Viewed

Binary file (5.8 kB). View file

logs/1682300938.1223385/events.out.tfevents.1682300938.mint.371280.5 ADDED Viewed

Binary file (5.8 kB). View file

logs/1682301013.2686887/events.out.tfevents.1682301013.mint.371280.7 ADDED Viewed

Binary file (5.8 kB). View file

logs/events.out.tfevents.1682300361.mint.371280.0 ADDED Viewed

Binary file (4.19 kB). View file

logs/events.out.tfevents.1682300884.mint.371280.2 ADDED Viewed

Binary file (4.19 kB). View file

logs/events.out.tfevents.1682300938.mint.371280.4 ADDED Viewed

Binary file (4.19 kB). View file

logs/events.out.tfevents.1682301013.mint.371280.6 ADDED Viewed

Binary file (4.19 kB). View file

train.py CHANGED Viewed

@@ -6,11 +6,11 @@ import pandas as pd
 from transformers import BertTokenizerFast, BertForSequenceClassification
 from transformers import Trainer, TrainingArguments
 model_name = "bert-base-uncased"
 tokenizer = BertTokenizerFast.from_pretrained(model_name)
-model = BertForSequenceClassification.from_pretrained(model_name, num_labels=6)
 max_len = 200
 training_args = TrainingArguments(
@@ -26,20 +26,7 @@ training_args = TrainingArguments(
     )
 # dataset class that inherits from torch.utils.data.Dataset
-class TweetDataset(Dataset):
-    def __init__(self, encodings, labels):
-        self.encodings = encodings
-        self.labels = labels
-        self.tok = tokenizer
-    def __getitem__(self, idx):
-        # encoding = self.tok(self.encodings[idx], truncation=True, padding="max_length", max_length=max_len)
-        item = { key: torch.tensor(val[idx]) for key, val in self.encoding.items() }
-        item['labels'] = torch.tensor(self.labels[idx])
-        return item
-    def __len__(self):
-        return len(self.labels)
 class TokenizerDataset(Dataset):
     def __init__(self, strings):
@@ -52,10 +39,8 @@ class TokenizerDataset(Dataset):
         return len(self.strings)
 train_data = pd.read_csv("data/train.csv")
 train_text = train_data["comment_text"]
 train_labels = train_data[["toxic", "severe_toxic",
                            "obscene", "threat",
@@ -77,9 +62,31 @@ test_text = test_text.values.tolist()
 test_labels = test_labels.values.tolist()
-# prepare tokenizer and dataset
 train_strings = TokenizerDataset(train_text)
 test_strings = TokenizerDataset(test_text)
@@ -99,45 +106,33 @@ test_dataloader = DataLoader(test_strings, batch_size=16, shuffle=True)
 #                             truncation=True, return_token_type_ids=False \
 #                             )
-train_encodings = tokenizer.encode(train_text, truncation=True, padding=True)
-test_encodings = tokenizer.encode(test_text, truncation=True, padding=True)
-f = open("traintokens.txt", 'a')
-f.write(train_encodings)
-f.write('\n\n\n\n\n')
-f.close()
-g = open("testtokens.txt", 'a')
-g.write(test_encodings)
-g.write('\n\n\n\n\n')
-g.close()
-# train_dataset = TweetDataset(train_encodings, train_labels)
-# test_dataset = TweetDataset(test_encodings, test_labels)
-# # training
-# trainer = Trainer(
-#     model=model,
-#     args=training_args,
-#     train_dataset=train_dataset,
-#     eval_dataset=test_dataset
-#     )
-# trainer.train()

 from transformers import BertTokenizerFast, BertForSequenceClassification
 from transformers import Trainer, TrainingArguments
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 model_name = "bert-base-uncased"
 tokenizer = BertTokenizerFast.from_pretrained(model_name)
+model = BertForSequenceClassification.from_pretrained(model_name, num_labels=6).to(device)
 max_len = 200
 training_args = TrainingArguments(
     )
 # dataset class that inherits from torch.utils.data.Dataset
 class TokenizerDataset(Dataset):
     def __init__(self, strings):
         return len(self.strings)
 train_data = pd.read_csv("data/train.csv")
+print(train_data)
 train_text = train_data["comment_text"]
 train_labels = train_data[["toxic", "severe_toxic",
                            "obscene", "threat",
 test_labels = test_labels.values.tolist()
+# prepare tokenizer and dataset
+class TweetDataset(Dataset):
+    def __init__(self, encodings, labels):
+        self.encodings = encodings
+        self.labels = labels
+        self.tok = tokenizer
+    def __getitem__(self, idx):
+        print(idx)
+        # print(len(self.labels))
+        encoding = self.tok(self.encodings.strings[idx], truncation=True,
+                            padding="max_length", max_length=max_len)
+        # print(encoding.items())
+        item = { key: torch.tensor(val) for key, val in encoding.items() }
+        item['labels'] = torch.tensor(self.labels[idx])
+        # print(item)
+        return item
+    def __len__(self):
+        return len(self.labels)
 train_strings = TokenizerDataset(train_text)
 test_strings = TokenizerDataset(test_text)
 #                             truncation=True, return_token_type_ids=False \
 #                             )
+# train_encodings = tokenizer(train_text, truncation=True, padding=True)
+# test_encodings = tokenizer(test_text, truncation=True, padding=True)
+train_dataset = TweetDataset(train_strings, train_labels)
+test_dataset = TweetDataset(test_strings, test_labels)
+print(len(train_dataset.labels))
+print(len(train_strings))
+class MultilabelTrainer(Trainer):
+    def compute_loss(self, model, inputs, return_outputs=False):
+        labels = inputs.pop("labels")
+        outputs = model(**inputs)
+        logits = outputs.logits
+        loss_fct = torch.nn.BCEWithLogitsLoss()
+        loss = loss_fct(logits.view(-1, self.model.config.num_labels),
+                        labels.float().view(-1, self.model.config.num_labels))
+        return (loss, outputs) if return_outputs else loss
+# training
+trainer = MultilabelTrainer(
+    model=model,
+    args=training_args,
+    train_dataset=train_dataset,
+    eval_dataset=test_dataset
+    )
+trainer.train()

working_training.ipynb ADDED Viewed

	@@ -0,0 +1,601 @@

+{
+  "cells": [
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "215a1aae",
+      "metadata": {
+        "id": "215a1aae"
+      },
+      "outputs": [],
+      "source": [
+        "import torch\n",
+        "from torch.utils.data import Dataset, DataLoader\n",
+        "\n",
+        "# import torch_xla\n",
+        "# import torch_xla.core.xla_model as xm\n",
+        "\n",
+        "import pandas as pd\n",
+        "\n",
+        "from transformers import BertTokenizerFast, BertForSequenceClassification\n",
+        "from transformers import Trainer, TrainingArguments"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "device = \"cuda:0\"\n",
+        "\n",
+        "model_name = \"bert-base-uncased\"\n",
+        "tokenizer = BertTokenizerFast.from_pretrained(model_name)\n",
+        "model = BertForSequenceClassification.from_pretrained(model_name, num_labels=6).to(device)\n",
+        "max_len = 200\n",
+        "\n",
+        "training_args = TrainingArguments(\n",
+        "    output_dir=\"results\",\n",
+        "    num_train_epochs=1,\n",
+        "    per_device_train_batch_size=16,\n",
+        "    per_device_eval_batch_size=64,\n",
+        "    warmup_steps=500,\n",
+        "    learning_rate=5e-5,\n",
+        "    weight_decay=0.01,\n",
+        "    logging_dir=\"./logs\",\n",
+        "    logging_steps=10\n",
+        "    )\n",
+        "\n",
+        "# dataset class that inherits from torch.utils.data.Dataset\n",
+        "\n",
+        "    \n",
+        "class TokenizerDataset(Dataset):\n",
+        "    def __init__(self, strings):\n",
+        "        self.strings = strings\n",
+        "    \n",
+        "    def __getitem__(self, idx):\n",
+        "        return self.strings[idx]\n",
+        "    \n",
+        "    def __len__(self):\n",
+        "        return len(self.strings)\n",
+        "    "
+      ],
+      "metadata": {
+        "id": "J5Tlgp4tNd0U",
+        "outputId": "5d45330f-ec42-4766-8bf6-85ba08af7c3b",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        }
+      },
+      "id": "J5Tlgp4tNd0U",
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight']\n",
+            "- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
+            "- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
+            "Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']\n",
+            "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "9969c58c",
+      "metadata": {
+        "scrolled": false,
+        "id": "9969c58c",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "outputId": "cc7363d4-0ad4-4b58-baae-72efe63c7aad"
+      },
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "                      id                                       comment_text  \\\n",
+            "0       0000997932d777bf  Explanation\\nWhy the edits made under my usern...   \n",
+            "1       000103f0d9cfb60f  D'aww! He matches this background colour I'm s...   \n",
+            "2       000113f07ec002fd  Hey man, I'm really not trying to edit war. It...   \n",
+            "3       0001b41b1c6bb37e  \"\\nMore\\nI can't make any real suggestions on ...   \n",
+            "4       0001d958c54c6e35  You, sir, are my hero. Any chance you remember...   \n",
+            "...                  ...                                                ...   \n",
+            "159566  ffe987279560d7ff  \":::::And for the second time of asking, when ...   \n",
+            "159567  ffea4adeee384e90  You should be ashamed of yourself \\n\\nThat is ...   \n",
+            "159568  ffee36eab5c267c9  Spitzer \\n\\nUmm, theres no actual article for ...   \n",
+            "159569  fff125370e4aaaf3  And it looks like it was actually you who put ...   \n",
+            "159570  fff46fc426af1f9a  \"\\nAnd ... I really don't think you understand...   \n",
+            "\n",
+            "        toxic  severe_toxic  obscene  threat  insult  identity_hate  \n",
+            "0           0             0        0       0       0              0  \n",
+            "1           0             0        0       0       0              0  \n",
+            "2           0             0        0       0       0              0  \n",
+            "3           0             0        0       0       0              0  \n",
+            "4           0             0        0       0       0              0  \n",
+            "...       ...           ...      ...     ...     ...            ...  \n",
+            "159566      0             0        0       0       0              0  \n",
+            "159567      0             0        0       0       0              0  \n",
+            "159568      0             0        0       0       0              0  \n",
+            "159569      0             0        0       0       0              0  \n",
+            "159570      0             0        0       0       0              0  \n",
+            "\n",
+            "[159571 rows x 8 columns]\n"
+          ]
+        }
+      ],
+      "source": [
+        "train_data = pd.read_csv(\"data/train.csv\")\n",
+        "print(train_data)\n",
+        "train_text = train_data[\"comment_text\"]\n",
+        "train_labels = train_data[[\"toxic\", \"severe_toxic\", \n",
+        "                           \"obscene\", \"threat\", \n",
+        "                           \"insult\", \"identity_hate\"]]\n",
+        "\n",
+        "test_text = pd.read_csv(\"data/test.csv\")[\"comment_text\"]\n",
+        "test_labels = pd.read_csv(\"data/test_labels.csv\")[[\n",
+        "                           \"toxic\", \"severe_toxic\", \n",
+        "                           \"obscene\", \"threat\", \n",
+        "                           \"insult\", \"identity_hate\"]]\n",
+        "\n",
+        "# data preprocessing\n",
+        "\n",
+        "\n",
+        "\n",
+        "train_text = train_text.values.tolist()\n",
+        "train_labels = train_labels.values.tolist()\n",
+        "test_text = test_text.values.tolist()\n",
+        "test_labels = test_labels.values.tolist()\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# prepare tokenizer and dataset\n",
+        "\n",
+        "class TweetDataset(Dataset):\n",
+        "    def __init__(self, encodings, labels):\n",
+        "        self.encodings = encodings\n",
+        "        self.labels = labels\n",
+        "        self.tok = tokenizer\n",
+        "    \n",
+        "    def __getitem__(self, idx):\n",
+        "        # print(idx)\n",
+        "        # print(len(self.labels))\n",
+        "        encoding = self.tok(self.encodings.strings[idx], truncation=True, \n",
+        "                            padding=\"max_length\", max_length=max_len)\n",
+        "        # print(encoding.items())\n",
+        "        item = { key: torch.tensor(val) for key, val in encoding.items() }\n",
+        "        item['labels'] = torch.tensor(self.labels[idx])\n",
+        "        # print(item)\n",
+        "        return item\n",
+        "    \n",
+        "    def __len__(self):\n",
+        "        return len(self.labels)\n",
+        "\n",
+        "\n",
+        "\n",
+        "\n",
+        "\n",
+        "train_strings = TokenizerDataset(train_text)\n",
+        "test_strings = TokenizerDataset(test_text)\n",
+        "\n",
+        "train_dataloader = DataLoader(train_strings, batch_size=16, shuffle=True)\n",
+        "test_dataloader = DataLoader(test_strings, batch_size=16, shuffle=True)\n",
+        "\n",
+        "\n",
+        "\n",
+        "\n",
+        "# train_encodings = tokenizer.batch_encode_plus(train_text, \\\n",
+        "#                             max_length=200, pad_to_max_length=True, \\\n",
+        "#                             truncation=True, return_token_type_ids=False)\n",
+        "#                             # return_tensors='pt')\n",
+        "# test_encodings = tokenizer.batch_encode_plus(test_text, \\\n",
+        "#                             max_length=200, pad_to_max_length=True, \\\n",
+        "#                             truncation=True, return_token_type_ids=False)\n",
+        "#                             # return_tensors='pt')\n",
+        "\n",
+        "# train_encodings = tokenizer(train_text, truncation=True, padding=True)\n",
+        "# test_encodings = tokenizer(test_text, truncation=True, padding=True)"
+      ],
+      "metadata": {
+        "id": "1n56TME9Njde"
+      },
+      "id": "1n56TME9Njde",
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "train_dataset = TweetDataset(train_strings, train_labels)\n",
+        "test_dataset = TweetDataset(test_strings, test_labels)\n",
+        "\n",
+        "print(len(train_dataset.labels))\n",
+        "print(len(train_strings))\n",
+        "\n",
+        "\n",
+        "class MultilabelTrainer(Trainer):\n",
+        "    def compute_loss(self, model, inputs, return_outputs=False):\n",
+        "        labels = inputs.pop(\"labels\")\n",
+        "        outputs = model(**inputs)\n",
+        "        logits = outputs.logits\n",
+        "        loss_fct = torch.nn.BCEWithLogitsLoss()\n",
+        "        loss = loss_fct(logits.view(-1, self.model.config.num_labels), \n",
+        "                        labels.float().view(-1, self.model.config.num_labels))\n",
+        "        return (loss, outputs) if return_outputs else loss\n",
+        "\n",
+        "\n",
+        "# training\n",
+        "trainer = MultilabelTrainer(\n",
+        "    model=model, \n",
+        "    args=training_args, \n",
+        "    train_dataset=train_dataset, \n",
+        "    eval_dataset=test_dataset\n",
+        "    )"
+      ],
+      "metadata": {
+        "id": "4kwydz67qjW9",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "outputId": "8405ba5b-6ef8-4bb1-87c0-637510e11cdc"
+      },
+      "id": "4kwydz67qjW9",
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "159571\n",
+            "159571\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "trainer.train()"
+      ],
+      "metadata": {
+        "id": "VwsyMZg_tgTg",
+        "outputId": "2153bf25-56d5-4b1f-a24a-8e2f4731638e",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 1000
+        }
+      },
+      "id": "VwsyMZg_tgTg",
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "/usr/local/lib/python3.9/dist-packages/transformers/optimization.py:391: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
+            "  warnings.warn(\n"
+          ]
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "<IPython.core.display.HTML object>"
+            ],
+            "text/html": [
+              "\n",
+              "    <div>\n",
+              "      \n",
+              "      <progress value='582' max='9974' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
+              "      [ 582/9974 05:37 < 1:30:57, 1.72 it/s, Epoch 0.06/1]\n",
+              "    </div>\n",
+              "    <table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              " <tr style=\"text-align: left;\">\n",
+              "      <th>Step</th>\n",
+              "      <th>Training Loss</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <td>10</td>\n",
+              "      <td>0.695800</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>20</td>\n",
+              "      <td>0.674200</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>30</td>\n",
+              "      <td>0.631900</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>40</td>\n",
+              "      <td>0.570600</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>50</td>\n",
+              "      <td>0.541100</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>60</td>\n",
+              "      <td>0.500300</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>70</td>\n",
+              "      <td>0.440800</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>80</td>\n",
+              "      <td>0.405400</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>90</td>\n",
+              "      <td>0.336200</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>100</td>\n",
+              "      <td>0.285000</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>110</td>\n",
+              "      <td>0.232400</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>120</td>\n",
+              "      <td>0.239500</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>130</td>\n",
+              "      <td>0.197300</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>140</td>\n",
+              "      <td>0.196700</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>150</td>\n",
+              "      <td>0.143900</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>160</td>\n",
+              "      <td>0.153700</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>170</td>\n",
+              "      <td>0.098200</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>180</td>\n",
+              "      <td>0.129700</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>190</td>\n",
+              "      <td>0.094500</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>200</td>\n",
+              "      <td>0.104400</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>210</td>\n",
+              "      <td>0.119000</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>220</td>\n",
+              "      <td>0.081700</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>230</td>\n",
+              "      <td>0.081800</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>240</td>\n",
+              "      <td>0.079700</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>250</td>\n",
+              "      <td>0.077800</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>260</td>\n",
+              "      <td>0.093200</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>270</td>\n",
+              "      <td>0.066400</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>280</td>\n",
+              "      <td>0.064000</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>290</td>\n",
+              "      <td>0.074000</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>300</td>\n",
+              "      <td>0.084200</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>310</td>\n",
+              "      <td>0.064300</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>320</td>\n",
+              "      <td>0.082100</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>330</td>\n",
+              "      <td>0.057900</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>340</td>\n",
+              "      <td>0.065000</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>350</td>\n",
+              "      <td>0.072900</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>360</td>\n",
+              "      <td>0.064500</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>370</td>\n",
+              "      <td>0.064300</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>380</td>\n",
+              "      <td>0.071900</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>390</td>\n",
+              "      <td>0.044600</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>400</td>\n",
+              "      <td>0.059300</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>410</td>\n",
+              "      <td>0.063000</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>420</td>\n",
+              "      <td>0.082400</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>430</td>\n",
+              "      <td>0.070100</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>440</td>\n",
+              "      <td>0.042700</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>450</td>\n",
+              "      <td>0.089500</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>460</td>\n",
+              "      <td>0.061400</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>470</td>\n",
+              "      <td>0.097300</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>480</td>\n",
+              "      <td>0.062700</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>490</td>\n",
+              "      <td>0.067800</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>500</td>\n",
+              "      <td>0.083300</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>510</td>\n",
+              "      <td>0.083500</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>520</td>\n",
+              "      <td>0.053300</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>530</td>\n",
+              "      <td>0.045400</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>540</td>\n",
+              "      <td>0.052300</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>550</td>\n",
+              "      <td>0.075300</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>560</td>\n",
+              "      <td>0.069000</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>570</td>\n",
+              "      <td>0.084800</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>580</td>\n",
+              "      <td>0.028800</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table><p>"
+            ]
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "error",
+          "ename": "KeyboardInterrupt",
+          "evalue": "ignored",
+          "traceback": [
+            "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+            "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
+            "\u001b[0;32m<ipython-input-6-3435b262f1ae>\u001b[0m in \u001b[0;36m<cell line: 1>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mtrainer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtrain\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
+            "\u001b[0;32m/usr/local/lib/python3.9/dist-packages/transformers/trainer.py\u001b[0m in \u001b[0;36mtrain\u001b[0;34m(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)\u001b[0m\n\u001b[1;32m   1660\u001b[0m             \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_inner_training_loop\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_train_batch_size\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mauto_find_batch_size\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1661\u001b[0m         )\n\u001b[0;32m-> 1662\u001b[0;31m         return inner_training_loop(\n\u001b[0m\u001b[1;32m   1663\u001b[0m             \u001b[0margs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1664\u001b[0m             \u001b[0mresume_from_checkpoint\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mresume_from_checkpoint\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+            "\u001b[0;32m/usr/local/lib/python3.9/dist-packages/transformers/trainer.py\u001b[0m in \u001b[0;36m_inner_training_loop\u001b[0;34m(self, batch_size, args, resume_from_checkpoint, trial, ignore_keys_for_eval)\u001b[0m\n\u001b[1;32m   1927\u001b[0m                         \u001b[0mtr_loss_step\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtraining_step\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmodel\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minputs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1928\u001b[0m                 \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1929\u001b[0;31m                     \u001b[0mtr_loss_step\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtraining_step\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmodel\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minputs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1930\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1931\u001b[0m                 if (\n",
+            "\u001b[0;32m/usr/local/lib/python3.9/dist-packages/transformers/trainer.py\u001b[0m in \u001b[0;36mtraining_step\u001b[0;34m(self, model, inputs)\u001b[0m\n\u001b[1;32m   2715\u001b[0m             \u001b[0mloss\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdeepspeed\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbackward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mloss\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   2716\u001b[0m         \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2717\u001b[0;31m             \u001b[0mloss\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbackward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   2718\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   2719\u001b[0m         \u001b[0;32mreturn\u001b[0m \u001b[0mloss\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdetach\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+            "\u001b[0;32m/usr/local/lib/python3.9/dist-packages/torch/_tensor.py\u001b[0m in \u001b[0;36mbackward\u001b[0;34m(self, gradient, retain_graph, create_graph, inputs)\u001b[0m\n\u001b[1;32m    485\u001b[0m                 \u001b[0minputs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0minputs\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    486\u001b[0m             )\n\u001b[0;32m--> 487\u001b[0;31m         torch.autograd.backward(\n\u001b[0m\u001b[1;32m    488\u001b[0m             \u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mgradient\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mretain_graph\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcreate_graph\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minputs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0minputs\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    489\u001b[0m         )\n",
+            "\u001b[0;32m/usr/local/lib/python3.9/dist-packages/torch/autograd/__init__.py\u001b[0m in \u001b[0;36mbackward\u001b[0;34m(tensors, grad_tensors, retain_graph, create_graph, grad_variables, inputs)\u001b[0m\n\u001b[1;32m    198\u001b[0m     \u001b[0;31m# some Python versions print out the first line of a multi-line function\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    199\u001b[0m     \u001b[0;31m# calls in the traceback and some print out the last line\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 200\u001b[0;31m     Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass\n\u001b[0m\u001b[1;32m    201\u001b[0m         \u001b[0mtensors\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mgrad_tensors_\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mretain_graph\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcreate_graph\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minputs\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    202\u001b[0m         allow_unreachable=True, accumulate_grad=True)  # Calls into the C++ engine to run the backward pass\n",
+            "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "!nvidia-smi"
+      ],
+      "metadata": {
+        "id": "EJPePRRQG1QK"
+      },
+      "id": "EJPePRRQG1QK",
+      "execution_count": null,
+      "outputs": []
+    }
+  ],
+  "metadata": {
+    "kernelspec": {
+      "display_name": "Python 3 (ipykernel)",
+      "language": "python",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.10.6"
+    },
+    "colab": {
+      "provenance": [],
+      "gpuType": "T4"
+    },
+    "accelerator": "GPU",
+    "gpuClass": "standard"
+  },
+  "nbformat": 4,
+  "nbformat_minor": 5
+}