File size: 31,008 Bytes
067109c
 
 
 
 
3dc1a87
067109c
 
 
 
bc8c903
 
 
 
e61ddcf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a1b3ed0
 
 
 
57cfcf7
 
 
 
 
 
 
 
ef5bafe
 
 
 
 
 
 
 
9e8d4cc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3dc1a87
 
 
 
067109c
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
{
    "sourceFile": "trainml.py",
    "activeCommit": 0,
    "commits": [
        {
            "activePatchIndex": 15,
            "patches": [
                {
                    "date": 1708166375103,
                    "content": "Index: \n===================================================================\n--- \n+++ \n"
                },
                {
                    "date": 1708166792627,
                    "content": "Index: \n===================================================================\n--- \n+++ \n@@ -83,4 +83,7 @@\n     metric.add_batch(predictions=predictions, references=batch[\"labels\"])  # We compare our robot's answers to the correct answers.\n \n final_score = metric.compute()  # We calculate how well our robot did.\n print(final_score)  # We print out the score to see how well our robot solved the puzzles!\n+\n+model.save_pretrained(\"path/to/save/model\")\n+tokenizer.save_pretrained(\"path/to/save/tokenizer\")\n\\ No newline at end of file\n"
                },
                {
                    "date": 1708167245700,
                    "content": "Index: \n===================================================================\n--- \n+++ \n@@ -8,8 +8,9 @@\n from tqdm.auto import tqdm  # This is our progress bar, showing us how far we've come in solving the book.\n import evaluate  # This tells us how well we did in solving puzzles.\n from accelerate import Accelerator  # This makes everything go super fast, like a rocket!\n \n+def train_and_save_model():\n # Now, let's pick up the book we're going to solve today.\n raw_datasets = load_dataset(\"glue\", \"mrpc\")  # This is a book filled with puzzles about matching sentences.\n \n # Before we start solving puzzles, we need to understand the language they're written in.\n"
                },
                {
                    "date": 1708167252023,
                    "content": "Index: \n===================================================================\n--- \n+++ \n@@ -86,5 +86,5 @@\n final_score = metric.compute()  # We calculate how well our robot did.\n print(final_score)  # We print out the score to see how well our robot solved the puzzles!\n \n model.save_pretrained(\"path/to/save/model\")\n-tokenizer.save_pretrained(\"path/to/save/tokenizer\")\n\\ No newline at end of file\n+tokenizer.save_pretrained(\"path/to/save/tokenizer\")\n"
                },
                {
                    "date": 1708167259383,
                    "content": "Index: \n===================================================================\n--- \n+++ \n@@ -9,82 +9,82 @@\n import evaluate  # This tells us how well we did in solving puzzles.\n from accelerate import Accelerator  # This makes everything go super fast, like a rocket!\n \n def train_and_save_model():\n-# Now, let's pick up the book we're going to solve today.\n-raw_datasets = load_dataset(\"glue\", \"mrpc\")  # This is a book filled with puzzles about matching sentences.\n+    # Now, let's pick up the book we're going to solve today.\n+    raw_datasets = load_dataset(\"glue\", \"mrpc\")  # This is a book filled with puzzles about matching sentences.\n \n-# Before we start solving puzzles, we need to understand the language they're written in.\n-checkpoint = \"bert-base-uncased\"  # This is a guidebook to help us understand the puzzles' language.\n-tokenizer = AutoTokenizer.from_pretrained(checkpoint)  # This tool helps us read and understand the language in our book.\n+    # Before we start solving puzzles, we need to understand the language they're written in.\n+    checkpoint = \"bert-base-uncased\"  # This is a guidebook to help us understand the puzzles' language.\n+    tokenizer = AutoTokenizer.from_pretrained(checkpoint)  # This tool helps us read and understand the language in our book.\n \n-# To solve puzzles, we need to make sure we understand each sentence properly.\n-def tokenize_function(example):  # This is like reading each sentence carefully and understanding each word.\n-    return tokenizer(example[\"sentence1\"], example[\"sentence2\"], truncation=True)\n+    # To solve puzzles, we need to make sure we understand each sentence properly.\n+    def tokenize_function(example):  # This is like reading each sentence carefully and understanding each word.\n+        return tokenizer(example[\"sentence1\"], example[\"sentence2\"], truncation=True)\n \n-# We prepare all puzzles in the book so they're ready to solve.\n-tokenized_datasets = raw_datasets.map(tokenize_function, batched=True)  # This is like marking all the important parts of the sentences.\n+    # We prepare all puzzles in the book so they're ready to solve.\n+    tokenized_datasets = raw_datasets.map(tokenize_function, batched=True)  # This is like marking all the important parts of the sentences.\n \n-# Puzzles can be different sizes, but our puzzle solver works best when all puzzles are the same size.\n-data_collator = DataCollatorWithPadding(tokenizer=tokenizer)  # This adds extra paper to smaller puzzles to make them all the same size.\n+    # Puzzles can be different sizes, but our puzzle solver works best when all puzzles are the same size.\n+    data_collator = DataCollatorWithPadding(tokenizer=tokenizer)  # This adds extra paper to smaller puzzles to make them all the same size.\n \n-# We're setting up our puzzle pages, making sure we're ready to solve them one by one.\n-tokenized_datasets = tokenized_datasets.remove_columns([\"sentence1\", \"sentence2\", \"idx\"])  # We remove stuff we don't need.\n-tokenized_datasets = tokenized_datasets.rename_column(\"label\", \"labels\")  # We make sure the puzzle answers are labeled correctly.\n-tokenized_datasets.set_format(\"torch\")  # We make sure our puzzles are in the right format for our brain to understand.\n+    # We're setting up our puzzle pages, making sure we're ready to solve them one by one.\n+    tokenized_datasets = tokenized_datasets.remove_columns([\"sentence1\", \"sentence2\", \"idx\"])  # We remove stuff we don't need.\n+    tokenized_datasets = tokenized_datasets.rename_column(\"label\", \"labels\")  # We make sure the puzzle answers are labeled correctly.\n+    tokenized_datasets.set_format(\"torch\")  # We make sure our puzzles are in the right format for our brain to understand.\n \n-# Now, we're ready to start solving puzzles, one page at a time.\n-train_dataloader = DataLoader(\n-    tokenized_datasets[\"train\"], shuffle=True, batch_size=8, collate_fn=data_collator\n-)  # This is our training puzzles.\n-eval_dataloader = DataLoader(\n-    tokenized_datasets[\"validation\"], batch_size=8, collate_fn=data_collator\n-)  # These are puzzles we use to check our progress.\n+    # Now, we're ready to start solving puzzles, one page at a time.\n+    train_dataloader = DataLoader(\n+        tokenized_datasets[\"train\"], shuffle=True, batch_size=8, collate_fn=data_collator\n+    )  # This is our training puzzles.\n+    eval_dataloader = DataLoader(\n+        tokenized_datasets[\"validation\"], batch_size=8, collate_fn=data_collator\n+    )  # These are puzzles we use to check our progress.\n \n-# We need a puzzle solver, which is specially trained to solve these types of puzzles.\n-model = AutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=2)  # This is our puzzle-solving robot.\n+    # We need a puzzle solver, which is specially trained to solve these types of puzzles.\n+    model = AutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=2)  # This is our puzzle-solving robot.\n \n-# Our robot needs instructions on how to get better at solving puzzles.\n-optimizer = AdamW(model.parameters(), lr=5e-5)  # This tells our robot how to improve.\n-num_epochs = 3  # This is how many times we'll go through the whole book of puzzles.\n-num_training_steps = num_epochs * len(train_dataloader)  # This is the total number of puzzles we'll solve.\n-lr_scheduler = get_scheduler(\n-    \"linear\",\n-    optimizer=optimizer,\n-    num_warmup_steps=0,\n-    num_training_steps=num_training_steps,\n-)  # This adjusts how quickly our robot learns over time.\n+    # Our robot needs instructions on how to get better at solving puzzles.\n+    optimizer = AdamW(model.parameters(), lr=5e-5)  # This tells our robot how to improve.\n+    num_epochs = 3  # This is how many times we'll go through the whole book of puzzles.\n+    num_training_steps = num_epochs * len(train_dataloader)  # This is the total number of puzzles we'll solve.\n+    lr_scheduler = get_scheduler(\n+        \"linear\",\n+        optimizer=optimizer,\n+        num_warmup_steps=0,\n+        num_training_steps=num_training_steps,\n+    )  # This adjusts how quickly our robot learns over time.\n \n-# To solve puzzles super fast, we're going to use a rocket!\n-accelerator = Accelerator()  # This is our rocket that makes everything go faster.\n-model, optimizer, train_dataloader, eval_dataloader = accelerator.prepare(\n-    model, optimizer, train_dataloader, eval_dataloader\n-)  # We make sure our robot, our puzzles, and our instructions are all ready for the rocket.\n+    # To solve puzzles super fast, we're going to use a rocket!\n+    accelerator = Accelerator()  # This is our rocket that makes everything go faster.\n+    model, optimizer, train_dataloader, eval_dataloader = accelerator.prepare(\n+        model, optimizer, train_dataloader, eval_dataloader\n+    )  # We make sure our robot, our puzzles, and our instructions are all ready for the rocket.\n \n-# It's time to start solving puzzles!\n-progress_bar = tqdm(range(num_training_steps))  # This shows us our progress.\n-model.train()  # We tell our robot it's time to start learning.\n-for epoch in range(num_epochs):  # We go through our book of puzzles multiple times to get really good.\n-    for batch in train_dataloader:  # Each time, we take a page of puzzles to solve.\n-        outputs = model(**batch)  # Our robot tries to solve the puzzles.\n-        loss = outputs.loss  # We check how many mistakes it made.\n-        accelerator.backward(loss)  # We give feedback to our robot so it can learn from its mistakes.\n-        optimizer.step()  # We update our robot's puzzle-solving strategy.\n-        lr_scheduler.step()  # We adjust how quickly our robot is learning.\n-        optimizer.zero_grad()  # We reset some settings to make sure our robot is ready for the next page.\n-        progress_bar.update(1)  # We update our progress bar to show how many puzzles we've solved.\n+    # It's time to start solving puzzles!\n+    progress_bar = tqdm(range(num_training_steps))  # This shows us our progress.\n+    model.train()  # We tell our robot it's time to start learning.\n+    for epoch in range(num_epochs):  # We go through our book of puzzles multiple times to get really good.\n+        for batch in train_dataloader:  # Each time, we take a page of puzzles to solve.\n+            outputs = model(**batch)  # Our robot tries to solve the puzzles.\n+            loss = outputs.loss  # We check how many mistakes it made.\n+            accelerator.backward(loss)  # We give feedback to our robot so it can learn from its mistakes.\n+            optimizer.step()  # We update our robot's puzzle-solving strategy.\n+            lr_scheduler.step()  # We adjust how quickly our robot is learning.\n+            optimizer.zero_grad()  # We reset some settings to make sure our robot is ready for the next page.\n+            progress_bar.update(1)  # We update our progress bar to show how many puzzles we've solved.\n \n-# After all that practice, it's time to test how good our robot has become at solving puzzles.\n-metric = evaluate.load(\"glue\", \"mrpc\")  # This is like the answer key to check our robot's work.\n-model.eval()  # We tell our robot it's time to show what it's learned.\n-for batch in eval_dataloader:  # We take a page of puzzles we haven't solved yet.\n-    with torch.no_grad():  # We make sure we're just testing, not learning anymore.\n-        outputs = model(**batch)  # Our robot solves the puzzles.\n-    logits = outputs.logits  # We look at our robot's answers.\n-    predictions = torch.argmax(logits, dim=-1)  # We decide which answer our robot thinks is right.\n-    metric.add_batch(predictions=predictions, references=batch[\"labels\"])  # We compare our robot's answers to the correct answers.\n+    # After all that practice, it's time to test how good our robot has become at solving puzzles.\n+    metric = evaluate.load(\"glue\", \"mrpc\")  # This is like the answer key to check our robot's work.\n+    model.eval()  # We tell our robot it's time to show what it's learned.\n+    for batch in eval_dataloader:  # We take a page of puzzles we haven't solved yet.\n+        with torch.no_grad():  # We make sure we're just testing, not learning anymore.\n+            outputs = model(**batch)  # Our robot solves the puzzles.\n+        logits = outputs.logits  # We look at our robot's answers.\n+        predictions = torch.argmax(logits, dim=-1)  # We decide which answer our robot thinks is right.\n+        metric.add_batch(predictions=predictions, references=batch[\"labels\"])  # We compare our robot's answers to the correct answers.\n \n-final_score = metric.compute()  # We calculate how well our robot did.\n-print(final_score)  # We print out the score to see how well our robot solved the puzzles!\n+    final_score = metric.compute()  # We calculate how well our robot did.\n+    print(final_score)  # We print out the score to see how well our robot solved the puzzles!\n \n-model.save_pretrained(\"path/to/save/model\")\n-tokenizer.save_pretrained(\"path/to/save/tokenizer\")\n+    model.save_pretrained(\"path/to/save/model\")\n+    tokenizer.save_pretrained(\"path/to/save/tokenizer\")\n"
                },
                {
                    "date": 1708167281057,
                    "content": "Index: \n===================================================================\n--- \n+++ \n@@ -87,4 +87,8 @@\n     print(final_score)  # We print out the score to see how well our robot solved the puzzles!\n \n     model.save_pretrained(\"path/to/save/model\")\n     tokenizer.save_pretrained(\"path/to/save/tokenizer\")\n+\n+if __name__ == \"__main__\":\n+    train_and_save_model()\n+\n"
                },
                {
                    "date": 1708176288213,
                    "content": "Index: \n===================================================================\n--- \n+++ \n@@ -44,9 +44,9 @@\n     model = AutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=2)  # This is our puzzle-solving robot.\n \n     # Our robot needs instructions on how to get better at solving puzzles.\n     optimizer = AdamW(model.parameters(), lr=5e-5)  # This tells our robot how to improve.\n-    num_epochs = 3  # This is how many times we'll go through the whole book of puzzles.\n+    num_epochs = 1  # This is how many times we'll go through the whole book of puzzles.\n     num_training_steps = num_epochs * len(train_dataloader)  # This is the total number of puzzles we'll solve.\n     lr_scheduler = get_scheduler(\n         \"linear\",\n         optimizer=optimizer,\n"
                },
                {
                    "date": 1708257203286,
                    "content": "Index: \n===================================================================\n--- \n+++ \n@@ -10,9 +10,9 @@\n from accelerate import Accelerator  # This makes everything go super fast, like a rocket!\n \n def train_and_save_model():\n     # Now, let's pick up the book we're going to solve today.\n-    raw_datasets = load_dataset(\"glue\", \"mrpc\")  # This is a book filled with puzzles about matching sentences.\n+    raw_datasets = load_dataset(\"glue\")  # This is a book filled with puzzles about matching sentences.\n \n     # Before we start solving puzzles, we need to understand the language they're written in.\n     checkpoint = \"bert-base-uncased\"  # This is a guidebook to help us understand the puzzles' language.\n     tokenizer = AutoTokenizer.from_pretrained(checkpoint)  # This tool helps us read and understand the language in our book.\n"
                },
                {
                    "date": 1708257209627,
                    "content": "Index: \n===================================================================\n--- \n+++ \n@@ -73,9 +73,9 @@\n             optimizer.zero_grad()  # We reset some settings to make sure our robot is ready for the next page.\n             progress_bar.update(1)  # We update our progress bar to show how many puzzles we've solved.\n \n     # After all that practice, it's time to test how good our robot has become at solving puzzles.\n-    metric = evaluate.load(\"glue\", \"mrpc\")  # This is like the answer key to check our robot's work.\n+    metric = evaluate.load(\"glue\")  # This is like the answer key to check our robot's work.\n     model.eval()  # We tell our robot it's time to show what it's learned.\n     for batch in eval_dataloader:  # We take a page of puzzles we haven't solved yet.\n         with torch.no_grad():  # We make sure we're just testing, not learning anymore.\n             outputs = model(**batch)  # Our robot solves the puzzles.\n"
                },
                {
                    "date": 1708259156708,
                    "content": "Index: \n===================================================================\n--- \n+++ \n@@ -73,9 +73,9 @@\n             optimizer.zero_grad()  # We reset some settings to make sure our robot is ready for the next page.\n             progress_bar.update(1)  # We update our progress bar to show how many puzzles we've solved.\n \n     # After all that practice, it's time to test how good our robot has become at solving puzzles.\n-    metric = evaluate.load(\"glue\")  # This is like the answer key to check our robot's work.\n+    metric = evaluate.load(\"glue\", \"mrpc\")  # This is like the answer key to check our robot's work.\n     model.eval()  # We tell our robot it's time to show what it's learned.\n     for batch in eval_dataloader:  # We take a page of puzzles we haven't solved yet.\n         with torch.no_grad():  # We make sure we're just testing, not learning anymore.\n             outputs = model(**batch)  # Our robot solves the puzzles.\n"
                },
                {
                    "date": 1708259164291,
                    "content": "Index: \n===================================================================\n--- \n+++ \n@@ -10,9 +10,9 @@\n from accelerate import Accelerator  # This makes everything go super fast, like a rocket!\n \n def train_and_save_model():\n     # Now, let's pick up the book we're going to solve today.\n-    raw_datasets = load_dataset(\"glue\")  # This is a book filled with puzzles about matching sentences.\n+    raw_datasets = load_dataset(\"glue\", \"mrpc\")  # This is a book filled with puzzles about matching sentences.\n \n     # Before we start solving puzzles, we need to understand the language they're written in.\n     checkpoint = \"bert-base-uncased\"  # This is a guidebook to help us understand the puzzles' language.\n     tokenizer = AutoTokenizer.from_pretrained(checkpoint)  # This tool helps us read and understand the language in our book.\n@@ -44,9 +44,9 @@\n     model = AutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=2)  # This is our puzzle-solving robot.\n \n     # Our robot needs instructions on how to get better at solving puzzles.\n     optimizer = AdamW(model.parameters(), lr=5e-5)  # This tells our robot how to improve.\n-    num_epochs = 1  # This is how many times we'll go through the whole book of puzzles.\n+    num_epochs =   # This is how many times we'll go through the whole book of puzzles.\n     num_training_steps = num_epochs * len(train_dataloader)  # This is the total number of puzzles we'll solve.\n     lr_scheduler = get_scheduler(\n         \"linear\",\n         optimizer=optimizer,\n"
                },
                {
                    "date": 1708259362403,
                    "content": "Index: \n===================================================================\n--- \n+++ \n@@ -13,9 +13,9 @@\n     # Now, let's pick up the book we're going to solve today.\n     raw_datasets = load_dataset(\"glue\", \"mrpc\")  # This is a book filled with puzzles about matching sentences.\n \n     # Before we start solving puzzles, we need to understand the language they're written in.\n-    checkpoint = \"bert-base-uncased\"  # This is a guidebook to help us understand the puzzles' language.\n+    checkpoint = \"bert\"-base-uncased  # This is a guidebook to help us understand the puzzles' language.\n     tokenizer = AutoTokenizer.from_pretrained(checkpoint)  # This tool helps us read and understand the language in our book.\n \n     # To solve puzzles, we need to make sure we understand each sentence properly.\n     def tokenize_function(example):  # This is like reading each sentence carefully and understanding each word.\n"
                },
                {
                    "date": 1708259368829,
                    "content": "Index: \n===================================================================\n--- \n+++ \n@@ -13,9 +13,9 @@\n     # Now, let's pick up the book we're going to solve today.\n     raw_datasets = load_dataset(\"glue\", \"mrpc\")  # This is a book filled with puzzles about matching sentences.\n \n     # Before we start solving puzzles, we need to understand the language they're written in.\n-    checkpoint = \"bert\"-base-uncased  # This is a guidebook to help us understand the puzzles' language.\n+    checkpoint = \"distilbert-base-uncased\"  # This is a guidebook to help us understand the puzzles' language.\n     tokenizer = AutoTokenizer.from_pretrained(checkpoint)  # This tool helps us read and understand the language in our book.\n \n     # To solve puzzles, we need to make sure we understand each sentence properly.\n     def tokenize_function(example):  # This is like reading each sentence carefully and understanding each word.\n"
                },
                {
                    "date": 1708259574161,
                    "content": "Index: \n===================================================================\n--- \n+++ \n@@ -54,9 +54,9 @@\n         num_training_steps=num_training_steps,\n     )  # This adjusts how quickly our robot learns over time.\n \n     # To solve puzzles super fast, we're going to use a rocket!\n-    accelerator = Accelerator()  # This is our rocket that makes everything go faster.\n+    accelerator = Accelerator(fp16=True)  # This is our rocket that makes everything go faster.\n     model, optimizer, train_dataloader, eval_dataloader = accelerator.prepare(\n         model, optimizer, train_dataloader, eval_dataloader\n     )  # We make sure our robot, our puzzles, and our instructions are all ready for the rocket.\n \n"
                },
                {
                    "date": 1708259707398,
                    "content": "Index: \n===================================================================\n--- \n+++ \n@@ -44,9 +44,9 @@\n     model = AutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=2)  # This is our puzzle-solving robot.\n \n     # Our robot needs instructions on how to get better at solving puzzles.\n     optimizer = AdamW(model.parameters(), lr=5e-5)  # This tells our robot how to improve.\n-    num_epochs =   # This is how many times we'll go through the whole book of puzzles.\n+    num_epochs =  1 # This is how many times we'll go through the whole book of puzzles.\n     num_training_steps = num_epochs * len(train_dataloader)  # This is the total number of puzzles we'll solve.\n     lr_scheduler = get_scheduler(\n         \"linear\",\n         optimizer=optimizer,\n"
                },
                {
                    "date": 1708262141106,
                    "content": "Index: \n===================================================================\n--- \n+++ \n@@ -54,9 +54,9 @@\n         num_training_steps=num_training_steps,\n     )  # This adjusts how quickly our robot learns over time.\n \n     # To solve puzzles super fast, we're going to use a rocket!\n-    accelerator = Accelerator(fp16=True)  # This is our rocket that makes everything go faster.\n+    accelerator = Accelerator()  # This is our rocket that makes everything go faster.\n     model, optimizer, train_dataloader, eval_dataloader = accelerator.prepare(\n         model, optimizer, train_dataloader, eval_dataloader\n     )  # We make sure our robot, our puzzles, and our instructions are all ready for the rocket.\n \n"
                }
            ],
            "date": 1708166375103,
            "name": "Commit-0",
            "content": "# First, we grab tools from our toolbox. These tools help us with different tasks like reading books (datasets),\n# learning new languages (tokenization), and solving puzzles (models).\nfrom datasets import load_dataset  # This tool helps us get our book, where the puzzles are.\nfrom transformers import AutoTokenizer, AutoModelForSequenceClassification, AdamW, get_scheduler  # These help us understand and solve puzzles.\nfrom transformers import DataCollatorWithPadding  # This makes sure all puzzle pieces are the same size.\nfrom torch.utils.data import DataLoader  # This helps us handle one page of puzzles at a time.\nimport torch  # This is like the brain of our operations, helping us think through puzzles.\nfrom tqdm.auto import tqdm  # This is our progress bar, showing us how far we've come in solving the book.\nimport evaluate  # This tells us how well we did in solving puzzles.\nfrom accelerate import Accelerator  # This makes everything go super fast, like a rocket!\n\n# Now, let's pick up the book we're going to solve today.\nraw_datasets = load_dataset(\"glue\", \"mrpc\")  # This is a book filled with puzzles about matching sentences.\n\n# Before we start solving puzzles, we need to understand the language they're written in.\ncheckpoint = \"bert-base-uncased\"  # This is a guidebook to help us understand the puzzles' language.\ntokenizer = AutoTokenizer.from_pretrained(checkpoint)  # This tool helps us read and understand the language in our book.\n\n# To solve puzzles, we need to make sure we understand each sentence properly.\ndef tokenize_function(example):  # This is like reading each sentence carefully and understanding each word.\n    return tokenizer(example[\"sentence1\"], example[\"sentence2\"], truncation=True)\n\n# We prepare all puzzles in the book so they're ready to solve.\ntokenized_datasets = raw_datasets.map(tokenize_function, batched=True)  # This is like marking all the important parts of the sentences.\n\n# Puzzles can be different sizes, but our puzzle solver works best when all puzzles are the same size.\ndata_collator = DataCollatorWithPadding(tokenizer=tokenizer)  # This adds extra paper to smaller puzzles to make them all the same size.\n\n# We're setting up our puzzle pages, making sure we're ready to solve them one by one.\ntokenized_datasets = tokenized_datasets.remove_columns([\"sentence1\", \"sentence2\", \"idx\"])  # We remove stuff we don't need.\ntokenized_datasets = tokenized_datasets.rename_column(\"label\", \"labels\")  # We make sure the puzzle answers are labeled correctly.\ntokenized_datasets.set_format(\"torch\")  # We make sure our puzzles are in the right format for our brain to understand.\n\n# Now, we're ready to start solving puzzles, one page at a time.\ntrain_dataloader = DataLoader(\n    tokenized_datasets[\"train\"], shuffle=True, batch_size=8, collate_fn=data_collator\n)  # This is our training puzzles.\neval_dataloader = DataLoader(\n    tokenized_datasets[\"validation\"], batch_size=8, collate_fn=data_collator\n)  # These are puzzles we use to check our progress.\n\n# We need a puzzle solver, which is specially trained to solve these types of puzzles.\nmodel = AutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=2)  # This is our puzzle-solving robot.\n\n# Our robot needs instructions on how to get better at solving puzzles.\noptimizer = AdamW(model.parameters(), lr=5e-5)  # This tells our robot how to improve.\nnum_epochs = 3  # This is how many times we'll go through the whole book of puzzles.\nnum_training_steps = num_epochs * len(train_dataloader)  # This is the total number of puzzles we'll solve.\nlr_scheduler = get_scheduler(\n    \"linear\",\n    optimizer=optimizer,\n    num_warmup_steps=0,\n    num_training_steps=num_training_steps,\n)  # This adjusts how quickly our robot learns over time.\n\n# To solve puzzles super fast, we're going to use a rocket!\naccelerator = Accelerator()  # This is our rocket that makes everything go faster.\nmodel, optimizer, train_dataloader, eval_dataloader = accelerator.prepare(\n    model, optimizer, train_dataloader, eval_dataloader\n)  # We make sure our robot, our puzzles, and our instructions are all ready for the rocket.\n\n# It's time to start solving puzzles!\nprogress_bar = tqdm(range(num_training_steps))  # This shows us our progress.\nmodel.train()  # We tell our robot it's time to start learning.\nfor epoch in range(num_epochs):  # We go through our book of puzzles multiple times to get really good.\n    for batch in train_dataloader:  # Each time, we take a page of puzzles to solve.\n        outputs = model(**batch)  # Our robot tries to solve the puzzles.\n        loss = outputs.loss  # We check how many mistakes it made.\n        accelerator.backward(loss)  # We give feedback to our robot so it can learn from its mistakes.\n        optimizer.step()  # We update our robot's puzzle-solving strategy.\n        lr_scheduler.step()  # We adjust how quickly our robot is learning.\n        optimizer.zero_grad()  # We reset some settings to make sure our robot is ready for the next page.\n        progress_bar.update(1)  # We update our progress bar to show how many puzzles we've solved.\n\n# After all that practice, it's time to test how good our robot has become at solving puzzles.\nmetric = evaluate.load(\"glue\", \"mrpc\")  # This is like the answer key to check our robot's work.\nmodel.eval()  # We tell our robot it's time to show what it's learned.\nfor batch in eval_dataloader:  # We take a page of puzzles we haven't solved yet.\n    with torch.no_grad():  # We make sure we're just testing, not learning anymore.\n        outputs = model(**batch)  # Our robot solves the puzzles.\n    logits = outputs.logits  # We look at our robot's answers.\n    predictions = torch.argmax(logits, dim=-1)  # We decide which answer our robot thinks is right.\n    metric.add_batch(predictions=predictions, references=batch[\"labels\"])  # We compare our robot's answers to the correct answers.\n\nfinal_score = metric.compute()  # We calculate how well our robot did.\nprint(final_score)  # We print out the score to see how well our robot solved the puzzles!\n"
        }
    ]
}