{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Processing data" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "import torch\n", "from torch.utils.data import DataLoader\n", "from transformers import get_scheduler, TrainingArguments, Trainer, DataCollatorWithPadding, AdamW, AutoTokenizer, AutoModelForSequenceClassification\n", "from datasets import load_dataset\n", "import gc\n", "import numpy as np\n", "from datasets import load_metric\n", "import random\n", "import os\n", "from tqdm.auto import tqdm" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "os.environ['CUDA_LAUNCH_BLOCKING'] = '1'" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "# reset GPU memory\n", "gc.collect()\n", "torch.cuda.empty_cache()" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "ename": "NameError", "evalue": "name 'AutoTokenizer' is not defined", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)", "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[0mcheckpoint\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;34m\"bert-base-uncased\"\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 2\u001b[1;33m \u001b[0mtokenizer\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mAutoTokenizer\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfrom_pretrained\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcheckpoint\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[1;31mNameError\u001b[0m: name 'AutoTokenizer' is not defined" ] } ], "source": [ "checkpoint = \"bert-base-uncased\"\n", "tokenizer = AutoTokenizer.from_pretrained(checkpoint)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight']\n", "- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", "- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n", "Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']\n", "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n" ] } ], "source": [ "checkpoint = \"bert-base-uncased\"\n", "tokenizer = AutoTokenizer.from_pretrained(checkpoint)\n", "model = AutoModelForSequenceClassification.from_pretrained(checkpoint)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "sequences = [\n", " \"I've been waiting for a HuggingFace course my whole life.\",\n", " \"This course is amazing!\",\n", "]\n", "batch = tokenizer(sequences, padding=True, truncation=True, return_tensors=\"pt\")\n", "batch[\"labels\"] = torch.tensor([1, 1])\n", "optimizer = AdamW(model.parameters())\n", "loss = model(**batch).loss\n", "loss.backward()\n", "optimizer.step()" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Reusing dataset glue (C:\\Users\\1seba\\.cache\\huggingface\\datasets\\glue\\mrpc\\1.0.0\\dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)\n" ] } ], "source": [ "raw_datasets = load_dataset(\"glue\",\"mrpc\")\n", "raw_train_dataset = raw_datasets['train']\n", "# print(raw_train_dataset.features)\n", "tokenizer = AutoTokenizer.from_pretrained(checkpoint)\n", "# # WHY CANT WE PASS THE DIFFERENT SENTENCES TOGETHER\n", "# tokenized_sentences_1 = tokenizer(raw_train_dataset[15]['sentence1'])\n", "# tokenized_sentences_2 = tokenizer(raw_train_dataset[15]['sentence2'])\n", "# print(tokenizer.decode(tokenized_sentences_1.input_ids), tokenizer.decode(tokenized_sentences_2.input_ids))\n", "# inputs = tokenizer(raw_train_dataset[15]['sentence1'], raw_train_dataset[15]['sentence2'])\n", "# print(tokenizer.decode(inputs.input_ids))\n", "inputs = tokenizer(raw_train_dataset['sentence1'], raw_train_dataset['sentence2'], padding=True, truncation=True)\n", "\n", "# tokenized_datasets = raw_datasets.map(tokenize_function, batched=False)\n", "# print(tokenized_datasets['train'].features)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['input_ids', 'token_type_ids', 'attention_mask']" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "list(inputs.keys())" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "100%|██████████| 4/4 [00:01<00:00, 3.69ba/s]\n", "100%|██████████| 1/1 [00:00<00:00, 16.42ba/s]\n", "100%|██████████| 2/2 [00:00<00:00, 6.22ba/s]\n" ] } ], "source": [ "def tokenize_function(example):\n", " tokenized = tokenizer(example['sentence1'], example['sentence2'], truncation=True)\n", "# tokenized['input_ids'] = ['CHANGED!' for item in tokenized['input_ids']]\n", " return tokenized\n", "tokenized_datasets = raw_datasets.map(tokenize_function, batched=True)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "data_collator = DataCollatorWithPadding(tokenizer=tokenizer)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[50, 59, 47, 67, 59, 50, 62, 32]" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "samples = tokenized_datasets[\"train\"][:8]\n", "samples = {k: v for k, v in samples.items() if k not in [\"idx\", \"sentence1\", \"sentence2\"]}\n", "[len(x) for x in samples[\"input_ids\"]]" ] }, { "cell_type": "code", "execution_count": 37, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "{'attention_mask': torch.Size([8, 67]),\n", " 'input_ids': torch.Size([8, 67]),\n", " 'token_type_ids': torch.Size([8, 67]),\n", " 'labels': torch.Size([8])}" ] }, "execution_count": 37, "metadata": {}, "output_type": "execute_result" } ], "source": [ "batch = data_collator(samples)\n", "{k: v.shape for k, v in batch.items()}" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Challenge 1" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "from torch.utils.data import DataLoader" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "samples = tokenized_datasets['test'][:8]\n", "samples = {k: samples[k] for k in list(samples.keys()) if k not in [\"idx\", \"sentence1\", \"sentence2\"]}" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "padded_samples = data_collator(samples)" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [], "source": [ "\n", "train_dataloader = DataLoader(tokenized_datasets['test'], batch_size=16, shuffle=True, collate_fn=data_collator)\n", "for batch in train_dataloader:\n", " print(batch['input_ids'].shape())" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Challenge 2" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Reusing dataset glue (C:\\Users\\1seba\\.cache\\huggingface\\datasets\\glue\\sst2\\1.0.0\\dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)\n" ] } ], "source": [ "raw_dataset_sst2 = load_dataset(\"glue\",\"sst2\")" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "100%|██████████| 68/68 [00:03<00:00, 18.46ba/s]\n", "100%|██████████| 1/1 [00:00<00:00, 16.67ba/s]\n", "100%|██████████| 2/2 [00:00<00:00, 16.67ba/s]\n" ] } ], "source": [ "dataset_to_tokenize = raw_dataset_sst2\n", "def tokenize_dynamic(example):\n", " dynamic_sentence_list = [x for x in list(example.keys()) if x not in ['label', 'idx']]\n", " if len(dynamic_sentence_list) == 1:\n", " return tokenizer(example[dynamic_sentence_list[0]], truncation=True)\n", " else:\n", " return tokenizer(example[dynamic_sentence_list[0]], example[dynamic_sentence_list[1]], truncation=True)\n", "tokenized_datasets = dataset_to_tokenize.map(tokenize_dynamic, batched=True)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "samples = tokenized_datasets['train'][:8]\n", "samples = {k: samples[k] for k in list(samples.keys()) if k not in [\"idx\", \"sentence\", \"sentence1\", \"sentence2\"]}" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "data_collator = DataCollatorWithPadding(tokenizer=tokenizer)" ] }, { "cell_type": "code", "execution_count": 74, "metadata": {}, "outputs": [], "source": [ "padded_data = data_collator(samples)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Fine-tuning a model with Trainer API" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Reusing dataset glue (C:\\Users\\1seba\\.cache\\huggingface\\datasets\\glue\\mrpc\\1.0.0\\dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)\n", "100%|██████████| 4/4 [00:00<00:00, 5.85ba/s]\n", "100%|██████████| 1/1 [00:00<00:00, 14.49ba/s]\n", "100%|██████████| 2/2 [00:00<00:00, 6.37ba/s]\n" ] } ], "source": [ "# set up so far\n", "from datasets import load_dataset\n", "from transformers import AutoTokenizer, DataCollatorWithPadding\n", "\n", "raw_datasets = load_dataset(\"glue\", \"mrpc\")\n", "checkpoint = \"bert-base-uncased\"\n", "tokenizer = AutoTokenizer.from_pretrained(checkpoint)\n", "\n", "def tokenize_function(example):\n", " return tokenizer(example[\"sentence1\"], example[\"sentence2\"], truncation=True)\n", "\n", "tokenized_datasets = raw_datasets.map(tokenize_function, batched=True)\n", "data_collator = DataCollatorWithPadding(tokenizer=tokenizer)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "from transformers import TrainingArguments\n", "from transformers import AutoModelForSequenceClassification" ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [], "source": [ "training_args = TrainingArguments(\"test-trainer\")\n", "model = AutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=2)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "100%|██████████| 4/4 [00:00<00:00, 4.14ba/s]\n", "100%|██████████| 1/1 [00:00<00:00, 9.71ba/s]\n" ] } ], "source": [ "train_dataset = tokenized_datasets[\"train\"].filter(percentageOfItems)\n", "validation_dataset = tokenized_datasets[\"validation\"].filter(percentageOfItems)" ] }, { "cell_type": "code", "execution_count": 42, "metadata": {}, "outputs": [], "source": [ "trainer = Trainer(\n", " model,\n", " training_args,\n", " train_dataset=train_dataset,\n", " eval_dataset=validation_dataset,\n", " data_collator=data_collator,\n", " tokenizer=tokenizer,\n", ")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ " 0%| | 0/132 [01:31) torch.Size([8, 2])\n" ] } ], "source": [ "outputs = model(**batch)\n", "print(outputs.loss, outputs.logits.shape)" ] }, { "cell_type": "code", "execution_count": 63, "metadata": {}, "outputs": [], "source": [ "from transformers import AdamW\n", "optimizer = AdamW(model.parameters(), lr=5e-5)" ] }, { "cell_type": "code", "execution_count": 64, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "93\n" ] } ], "source": [ "from transformers import get_scheduler\n", "num_epochs = 3\n", "num_training_steps = num_epochs * len(train_dataloader)\n", "lr_scheduler = get_scheduler(\n", " 'linear',\n", " optimizer,\n", " num_warmup_steps=0,\n", " num_training_steps=num_training_steps,\n", ")\n", "print(num_training_steps)\n" ] }, { "cell_type": "code", "execution_count": 65, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "device(type='cuda')" ] }, "execution_count": 65, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import torch\n", "device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')\n", "model.to(device)\n", "device" ] }, { "cell_type": "code", "execution_count": 71, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "100%|██████████| 93/93 [08:50<00:00, 5.70s/it]\n", "100%|██████████| 93/93 [00:28<00:00, 3.21it/s]" ] } ], "source": [ "from tqdm.auto import tqdm\n", "progress_bar = tqdm(range(num_training_steps))\n", "model.train()\n", "for epoch in range(num_epochs):\n", " for batch in train_dataloader:\n", " batch = {k: v.to(device) for k, v in batch.items()}\n", " outputs = model(**batch)\n", " loss = outputs.loss\n", " loss.backward()\n", " optimizer.step()\n", " optimizer.zero_grad()\n", " progress_bar.update(1)\n", " \n", " # metric = load_metric('glue', 'mrpc')\n", " # model.eval()\n", " # for batch in eval_dataloader:\n", " # batch = {k: v.to(device) for k, v in batch.items()}\n", " # with torch.no_grad():\n", " # outputs = model(**batch)\n", " # logits = outputs.logits\n", " # predictions = torch.argmax(logits, dim=-1)\n", " # metric.add_batch(predictions=predictions, references=batch['labels'])\n", " # print(metric.compute())" ] }, { "cell_type": "code", "execution_count": 109, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'accuracy': 0.6463414634146342, 'f1': 0.7851851851851851}" ] }, "execution_count": 109, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from datasets import load_metric\n", "metric = load_metric('glue', 'mrpc')\n", "model.eval()\n", "for batch in eval_dataloader:\n", " batch = {k: v.to(device) for k, v in batch.items()}\n", " with torch.no_grad():\n", " outputs = model(**batch)\n", " logits = outputs.logits\n", " predictions = torch.argmax(logits, dim=-1)\n", " metric.add_batch(predictions=predictions, references=batch['labels'])\n", "metric.compute()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Challenge 1" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Reusing dataset glue (C:\\Users\\1seba\\.cache\\huggingface\\datasets\\glue\\sst2\\1.0.0\\dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)\n", "100%|██████████| 68/68 [00:03<00:00, 20.33ba/s]\n", "100%|██████████| 1/1 [00:00<00:00, 17.24ba/s]\n", "100%|██████████| 2/2 [00:00<00:00, 16.53ba/s]\n" ] } ], "source": [ "device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')\n", "\n", "sst2_datasets = load_dataset(\"glue\", \"sst2\")\n", "def tokenize_function (example):\n", " return tokenizer(example['sentence'], truncation=True)\n", "tokenized_datasets = sst2_datasets.map(tokenize_function, batched=True)\n", "tokenized_datasets = tokenized_datasets.remove_columns([\"idx\", \"sentence\"])\n", "tokenized_datasets = tokenized_datasets.rename_column('label', 'labels')\n", "tokenized_datasets.set_format('torch')\n", "data_collator = DataCollatorWithPadding(tokenizer=tokenizer)\n", "train_dataset = DataLoader(\n", " tokenized_datasets['train'].shard(num_shards=180, index=0), shuffle=True, batch_size=8, collate_fn=data_collator\n", ")\n", "eval_dataset = DataLoader(\n", " tokenized_datasets['validation'].shard(num_shards=4, index=0), batch_size=8, collate_fn=data_collator\n", ")" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight']\n", "- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", "- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n", "Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']\n", "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n", "100%|██████████| 141/141 [18:15<00:00, 7.77s/it]\n", "100%|██████████| 141/141 [01:12<00:00, 2.21it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "[{'accuracy': 0.7568807339449541}, {'accuracy': 0.8256880733944955}, {'accuracy': 0.8623853211009175}]\n" ] } ], "source": [ "model = AutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=2)\n", "model.to(device)\n", "optimizer= AdamW(model.parameters(), 5e-5)\n", "\n", "num_epochs = 3\n", "num_training_steps = num_epochs * len(train_dataset)\n", "lr_scheduler = get_scheduler(\n", " 'linear',\n", " optimizer=optimizer,\n", " num_warmup_steps=0,\n", " num_training_steps=num_training_steps,\n", ")\n", "\n", "metrics = []\n", "\n", "progress_bar = tqdm(range(num_training_steps))\n", "model.train()\n", "for epoch in range(num_epochs):\n", " for batch in train_dataset:\n", " batch = {k: v.to(device) for k, v in batch.items()}\n", " outputs = model(**batch)\n", " loss = outputs.loss\n", " loss.backward()\n", " optimizer.step()\n", " lr_scheduler.step()\n", " optimizer.zero_grad()\n", " progress_bar.update(1)\n", "\n", " metric= load_metric(\"glue\", \"sst2\")\n", " model.eval()\n", " for batch in eval_dataset:\n", " batch = {k: v.to(device) for k, v in batch.items()}\n", " with torch.no_grad():\n", " outputs = model(**batch)\n", " logits = outputs.logits\n", " predictions = torch.argmax(logits, dim=-1)\n", " metric.add_batch(predictions=predictions, references=batch[\"labels\"])\n", " metrics.append(metric.compute())\n", "\n", "print(metrics)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## (end)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "from accelerate import Accelerator\n", "accelerator = Accelerator()" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight']\n", "- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", "- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n", "Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']\n", "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n", "100%|██████████| 93/93 [01:11<00:00, 1.85it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "[{'accuracy': 0.6707317073170732}, {'accuracy': 0.7073170731707317}, {'accuracy': 0.7560975609756098}]\n" ] } ], "source": [ "model = AutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=2)\n", "optimizer= AdamW(model.parameters(), 5e-5)\n", "train_dataloader, eval_dataloader, model, optimizer = accelerator.prepare(\n", " train_dataloader, eval_dataloader, model, optimizer\n", ")\n", "\n", "num_epochs = 3\n", "num_training_steps = num_epochs * len(train_dataloader)\n", "lr_scheduler = get_scheduler(\n", " 'linear',\n", " optimizer=optimizer,\n", " num_warmup_steps=0,\n", " num_training_steps=num_training_steps,\n", ")\n", "\n", "metrics = []\n", "\n", "progress_bar = tqdm(range(num_training_steps))\n", "model.train()\n", "for epoch in range(num_epochs):\n", " for batch in train_dataloader:\n", " outputs = model(**batch)\n", " loss = outputs.loss\n", " accelerator.backward(loss)\n", " optimizer.step()\n", " lr_scheduler.step()\n", " optimizer.zero_grad()\n", " progress_bar.update(1)\n", "\n", " metric= load_metric(\"glue\", \"sst2\")\n", " model.eval()\n", " for batch in eval_dataloader:\n", " with torch.no_grad():\n", " outputs = model(**batch)\n", " logits = outputs.logits\n", " predictions = torch.argmax(logits, dim=-1)\n", " metric.add_batch(predictions=predictions, references=batch[\"labels\"])\n", " metrics.append(metric.compute())\n", "\n", "print(metrics)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "interpreter": { "hash": "c23364dc34acf6d559b2ccbb804894040b11f1b7cd300b891de29d32dea3c2c2" }, "kernelspec": { "display_name": "Python 3.8.10 64-bit ('AI': conda)", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.10" } }, "nbformat": 4, "nbformat_minor": 5 }