Spaces:

apipulse
/

test

Runtime error

App Files Files Community

Imvikram99 commited on Feb 17, 2024

Commit

e61ddcf

1 Parent(s): bc8c903

train and use

Browse files

Files changed (10) hide show

.history/app_20240217162502.py +37 -0
.history/app_20240217162512.py +37 -0
.history/trainml_20240217162405.py +90 -0
.history/trainml_20240217162411.py +90 -0
.history/trainml_20240217162419.py +90 -0
.history/trainml_20240217162441.py +94 -0
.lh/app.py.json +9 -1
.lh/trainml.py.json +17 -1
app.py +2 -0
trainml.py +83 -78

.history/app_20240217162502.py ADDED Viewed

	@@ -0,0 +1,37 @@

+import gradio as gr
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+import torch
+from trainml import train_and_save_model  # Import the training function
+# Load the trained model and tokenizer
+model_path = "path/to/save/model"
+tokenizer_path = "path/to/save/tokenizer"
+model = AutoModelForSequenceClassification.from_pretrained(model_path)
+tokenizer = AutoTokenizer.from_pretrained(tokenizer_path)
+model.eval()  # Set model to evaluation mode
+def predict_paraphrase(sentence1, sentence2):
+    # Tokenize the input sentences
+    inputs = tokenizer(sentence1, sentence2, return_tensors="pt", padding=True, truncation=True)
+    with torch.no_grad():
+        outputs = model(**inputs)
+    # Get probabilities
+    probs = torch.nn.functional.softmax(outputs.logits, dim=-1).tolist()[0]
+    # Assuming the first class (index 0) is 'not paraphrase' and the second class (index 1) is 'paraphrase'
+    return {"Not Paraphrase": probs[0], "Paraphrase": probs[1]}
+# Create Gradio interface
+iface = gr.Interface(
+    fn=predict_paraphrase,
+    inputs=[gr.inputs.Textbox(lines=2, placeholder="Enter Sentence 1 Here..."),
+            gr.inputs.Textbox(lines=2, placeholder="Enter Sentence 2 Here...")],
+    outputs=gr.outputs.Label(num_top_classes=2),
+    title="Paraphrase Identification",
+    description="This model predicts whether two sentences are paraphrases of each other."
+)
+iface.launch()

.history/app_20240217162512.py ADDED Viewed

	@@ -0,0 +1,37 @@

+import gradio as gr
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+import torch
+from trainml import train_and_save_model  # Import the training function
+train_and_save_model()
+# Load the trained model and tokenizer
+model_path = "path/to/save/model"
+tokenizer_path = "path/to/save/tokenizer"
+model = AutoModelForSequenceClassification.from_pretrained(model_path)
+tokenizer = AutoTokenizer.from_pretrained(tokenizer_path)
+model.eval()  # Set model to evaluation mode
+def predict_paraphrase(sentence1, sentence2):
+    # Tokenize the input sentences
+    inputs = tokenizer(sentence1, sentence2, return_tensors="pt", padding=True, truncation=True)
+    with torch.no_grad():
+        outputs = model(**inputs)
+    # Get probabilities
+    probs = torch.nn.functional.softmax(outputs.logits, dim=-1).tolist()[0]
+    # Assuming the first class (index 0) is 'not paraphrase' and the second class (index 1) is 'paraphrase'
+    return {"Not Paraphrase": probs[0], "Paraphrase": probs[1]}
+# Create Gradio interface
+iface = gr.Interface(
+    fn=predict_paraphrase,
+    inputs=[gr.inputs.Textbox(lines=2, placeholder="Enter Sentence 1 Here..."),
+            gr.inputs.Textbox(lines=2, placeholder="Enter Sentence 2 Here...")],
+    outputs=gr.outputs.Label(num_top_classes=2),
+    title="Paraphrase Identification",
+    description="This model predicts whether two sentences are paraphrases of each other."
+)
+iface.launch()

.history/trainml_20240217162405.py ADDED Viewed

	@@ -0,0 +1,90 @@

+# First, we grab tools from our toolbox. These tools help us with different tasks like reading books (datasets),
+# learning new languages (tokenization), and solving puzzles (models).
+from datasets import load_dataset  # This tool helps us get our book, where the puzzles are.
+from transformers import AutoTokenizer, AutoModelForSequenceClassification, AdamW, get_scheduler  # These help us understand and solve puzzles.
+from transformers import DataCollatorWithPadding  # This makes sure all puzzle pieces are the same size.
+from torch.utils.data import DataLoader  # This helps us handle one page of puzzles at a time.
+import torch  # This is like the brain of our operations, helping us think through puzzles.
+from tqdm.auto import tqdm  # This is our progress bar, showing us how far we've come in solving the book.
+import evaluate  # This tells us how well we did in solving puzzles.
+from accelerate import Accelerator  # This makes everything go super fast, like a rocket!
+def train_and_save_model():
+# Now, let's pick up the book we're going to solve today.
+raw_datasets = load_dataset("glue", "mrpc")  # This is a book filled with puzzles about matching sentences.
+# Before we start solving puzzles, we need to understand the language they're written in.
+checkpoint = "bert-base-uncased"  # This is a guidebook to help us understand the puzzles' language.
+tokenizer = AutoTokenizer.from_pretrained(checkpoint)  # This tool helps us read and understand the language in our book.
+# To solve puzzles, we need to make sure we understand each sentence properly.
+def tokenize_function(example):  # This is like reading each sentence carefully and understanding each word.
+    return tokenizer(example["sentence1"], example["sentence2"], truncation=True)
+# We prepare all puzzles in the book so they're ready to solve.
+tokenized_datasets = raw_datasets.map(tokenize_function, batched=True)  # This is like marking all the important parts of the sentences.
+# Puzzles can be different sizes, but our puzzle solver works best when all puzzles are the same size.
+data_collator = DataCollatorWithPadding(tokenizer=tokenizer)  # This adds extra paper to smaller puzzles to make them all the same size.
+# We're setting up our puzzle pages, making sure we're ready to solve them one by one.
+tokenized_datasets = tokenized_datasets.remove_columns(["sentence1", "sentence2", "idx"])  # We remove stuff we don't need.
+tokenized_datasets = tokenized_datasets.rename_column("label", "labels")  # We make sure the puzzle answers are labeled correctly.
+tokenized_datasets.set_format("torch")  # We make sure our puzzles are in the right format for our brain to understand.
+# Now, we're ready to start solving puzzles, one page at a time.
+train_dataloader = DataLoader(
+    tokenized_datasets["train"], shuffle=True, batch_size=8, collate_fn=data_collator
+)  # This is our training puzzles.
+eval_dataloader = DataLoader(
+    tokenized_datasets["validation"], batch_size=8, collate_fn=data_collator
+)  # These are puzzles we use to check our progress.
+# We need a puzzle solver, which is specially trained to solve these types of puzzles.
+model = AutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=2)  # This is our puzzle-solving robot.
+# Our robot needs instructions on how to get better at solving puzzles.
+optimizer = AdamW(model.parameters(), lr=5e-5)  # This tells our robot how to improve.
+num_epochs = 3  # This is how many times we'll go through the whole book of puzzles.
+num_training_steps = num_epochs * len(train_dataloader)  # This is the total number of puzzles we'll solve.
+lr_scheduler = get_scheduler(
+    "linear",
+    optimizer=optimizer,
+    num_warmup_steps=0,
+    num_training_steps=num_training_steps,
+)  # This adjusts how quickly our robot learns over time.
+# To solve puzzles super fast, we're going to use a rocket!
+accelerator = Accelerator()  # This is our rocket that makes everything go faster.
+model, optimizer, train_dataloader, eval_dataloader = accelerator.prepare(
+    model, optimizer, train_dataloader, eval_dataloader
+)  # We make sure our robot, our puzzles, and our instructions are all ready for the rocket.
+# It's time to start solving puzzles!
+progress_bar = tqdm(range(num_training_steps))  # This shows us our progress.
+model.train()  # We tell our robot it's time to start learning.
+for epoch in range(num_epochs):  # We go through our book of puzzles multiple times to get really good.
+    for batch in train_dataloader:  # Each time, we take a page of puzzles to solve.
+        outputs = model(**batch)  # Our robot tries to solve the puzzles.
+        loss = outputs.loss  # We check how many mistakes it made.
+        accelerator.backward(loss)  # We give feedback to our robot so it can learn from its mistakes.
+        optimizer.step()  # We update our robot's puzzle-solving strategy.
+        lr_scheduler.step()  # We adjust how quickly our robot is learning.
+        optimizer.zero_grad()  # We reset some settings to make sure our robot is ready for the next page.
+        progress_bar.update(1)  # We update our progress bar to show how many puzzles we've solved.
+# After all that practice, it's time to test how good our robot has become at solving puzzles.
+metric = evaluate.load("glue", "mrpc")  # This is like the answer key to check our robot's work.
+model.eval()  # We tell our robot it's time to show what it's learned.
+for batch in eval_dataloader:  # We take a page of puzzles we haven't solved yet.
+    with torch.no_grad():  # We make sure we're just testing, not learning anymore.
+        outputs = model(**batch)  # Our robot solves the puzzles.
+    logits = outputs.logits  # We look at our robot's answers.
+    predictions = torch.argmax(logits, dim=-1)  # We decide which answer our robot thinks is right.
+    metric.add_batch(predictions=predictions, references=batch["labels"])  # We compare our robot's answers to the correct answers.
+final_score = metric.compute()  # We calculate how well our robot did.
+print(final_score)  # We print out the score to see how well our robot solved the puzzles!
+model.save_pretrained("path/to/save/model")
+tokenizer.save_pretrained("path/to/save/tokenizer")

.history/trainml_20240217162411.py ADDED Viewed

	@@ -0,0 +1,90 @@

+# First, we grab tools from our toolbox. These tools help us with different tasks like reading books (datasets),
+# learning new languages (tokenization), and solving puzzles (models).
+from datasets import load_dataset  # This tool helps us get our book, where the puzzles are.
+from transformers import AutoTokenizer, AutoModelForSequenceClassification, AdamW, get_scheduler  # These help us understand and solve puzzles.
+from transformers import DataCollatorWithPadding  # This makes sure all puzzle pieces are the same size.
+from torch.utils.data import DataLoader  # This helps us handle one page of puzzles at a time.
+import torch  # This is like the brain of our operations, helping us think through puzzles.
+from tqdm.auto import tqdm  # This is our progress bar, showing us how far we've come in solving the book.
+import evaluate  # This tells us how well we did in solving puzzles.
+from accelerate import Accelerator  # This makes everything go super fast, like a rocket!
+def train_and_save_model():
+# Now, let's pick up the book we're going to solve today.
+raw_datasets = load_dataset("glue", "mrpc")  # This is a book filled with puzzles about matching sentences.
+# Before we start solving puzzles, we need to understand the language they're written in.
+checkpoint = "bert-base-uncased"  # This is a guidebook to help us understand the puzzles' language.
+tokenizer = AutoTokenizer.from_pretrained(checkpoint)  # This tool helps us read and understand the language in our book.
+# To solve puzzles, we need to make sure we understand each sentence properly.
+def tokenize_function(example):  # This is like reading each sentence carefully and understanding each word.
+    return tokenizer(example["sentence1"], example["sentence2"], truncation=True)
+# We prepare all puzzles in the book so they're ready to solve.
+tokenized_datasets = raw_datasets.map(tokenize_function, batched=True)  # This is like marking all the important parts of the sentences.
+# Puzzles can be different sizes, but our puzzle solver works best when all puzzles are the same size.
+data_collator = DataCollatorWithPadding(tokenizer=tokenizer)  # This adds extra paper to smaller puzzles to make them all the same size.
+# We're setting up our puzzle pages, making sure we're ready to solve them one by one.
+tokenized_datasets = tokenized_datasets.remove_columns(["sentence1", "sentence2", "idx"])  # We remove stuff we don't need.
+tokenized_datasets = tokenized_datasets.rename_column("label", "labels")  # We make sure the puzzle answers are labeled correctly.
+tokenized_datasets.set_format("torch")  # We make sure our puzzles are in the right format for our brain to understand.
+# Now, we're ready to start solving puzzles, one page at a time.
+train_dataloader = DataLoader(
+    tokenized_datasets["train"], shuffle=True, batch_size=8, collate_fn=data_collator
+)  # This is our training puzzles.
+eval_dataloader = DataLoader(
+    tokenized_datasets["validation"], batch_size=8, collate_fn=data_collator
+)  # These are puzzles we use to check our progress.
+# We need a puzzle solver, which is specially trained to solve these types of puzzles.
+model = AutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=2)  # This is our puzzle-solving robot.
+# Our robot needs instructions on how to get better at solving puzzles.
+optimizer = AdamW(model.parameters(), lr=5e-5)  # This tells our robot how to improve.
+num_epochs = 3  # This is how many times we'll go through the whole book of puzzles.
+num_training_steps = num_epochs * len(train_dataloader)  # This is the total number of puzzles we'll solve.
+lr_scheduler = get_scheduler(
+    "linear",
+    optimizer=optimizer,
+    num_warmup_steps=0,
+    num_training_steps=num_training_steps,
+)  # This adjusts how quickly our robot learns over time.
+# To solve puzzles super fast, we're going to use a rocket!
+accelerator = Accelerator()  # This is our rocket that makes everything go faster.
+model, optimizer, train_dataloader, eval_dataloader = accelerator.prepare(
+    model, optimizer, train_dataloader, eval_dataloader
+)  # We make sure our robot, our puzzles, and our instructions are all ready for the rocket.
+# It's time to start solving puzzles!
+progress_bar = tqdm(range(num_training_steps))  # This shows us our progress.
+model.train()  # We tell our robot it's time to start learning.
+for epoch in range(num_epochs):  # We go through our book of puzzles multiple times to get really good.
+    for batch in train_dataloader:  # Each time, we take a page of puzzles to solve.
+        outputs = model(**batch)  # Our robot tries to solve the puzzles.
+        loss = outputs.loss  # We check how many mistakes it made.
+        accelerator.backward(loss)  # We give feedback to our robot so it can learn from its mistakes.
+        optimizer.step()  # We update our robot's puzzle-solving strategy.
+        lr_scheduler.step()  # We adjust how quickly our robot is learning.
+        optimizer.zero_grad()  # We reset some settings to make sure our robot is ready for the next page.
+        progress_bar.update(1)  # We update our progress bar to show how many puzzles we've solved.
+# After all that practice, it's time to test how good our robot has become at solving puzzles.
+metric = evaluate.load("glue", "mrpc")  # This is like the answer key to check our robot's work.
+model.eval()  # We tell our robot it's time to show what it's learned.
+for batch in eval_dataloader:  # We take a page of puzzles we haven't solved yet.
+    with torch.no_grad():  # We make sure we're just testing, not learning anymore.
+        outputs = model(**batch)  # Our robot solves the puzzles.
+    logits = outputs.logits  # We look at our robot's answers.
+    predictions = torch.argmax(logits, dim=-1)  # We decide which answer our robot thinks is right.
+    metric.add_batch(predictions=predictions, references=batch["labels"])  # We compare our robot's answers to the correct answers.
+final_score = metric.compute()  # We calculate how well our robot did.
+print(final_score)  # We print out the score to see how well our robot solved the puzzles!
+model.save_pretrained("path/to/save/model")
+tokenizer.save_pretrained("path/to/save/tokenizer")

.history/trainml_20240217162419.py ADDED Viewed

	@@ -0,0 +1,90 @@

+# First, we grab tools from our toolbox. These tools help us with different tasks like reading books (datasets),
+# learning new languages (tokenization), and solving puzzles (models).
+from datasets import load_dataset  # This tool helps us get our book, where the puzzles are.
+from transformers import AutoTokenizer, AutoModelForSequenceClassification, AdamW, get_scheduler  # These help us understand and solve puzzles.
+from transformers import DataCollatorWithPadding  # This makes sure all puzzle pieces are the same size.
+from torch.utils.data import DataLoader  # This helps us handle one page of puzzles at a time.
+import torch  # This is like the brain of our operations, helping us think through puzzles.
+from tqdm.auto import tqdm  # This is our progress bar, showing us how far we've come in solving the book.
+import evaluate  # This tells us how well we did in solving puzzles.
+from accelerate import Accelerator  # This makes everything go super fast, like a rocket!
+def train_and_save_model():
+    # Now, let's pick up the book we're going to solve today.
+    raw_datasets = load_dataset("glue", "mrpc")  # This is a book filled with puzzles about matching sentences.
+    # Before we start solving puzzles, we need to understand the language they're written in.
+    checkpoint = "bert-base-uncased"  # This is a guidebook to help us understand the puzzles' language.
+    tokenizer = AutoTokenizer.from_pretrained(checkpoint)  # This tool helps us read and understand the language in our book.
+    # To solve puzzles, we need to make sure we understand each sentence properly.
+    def tokenize_function(example):  # This is like reading each sentence carefully and understanding each word.
+        return tokenizer(example["sentence1"], example["sentence2"], truncation=True)
+    # We prepare all puzzles in the book so they're ready to solve.
+    tokenized_datasets = raw_datasets.map(tokenize_function, batched=True)  # This is like marking all the important parts of the sentences.
+    # Puzzles can be different sizes, but our puzzle solver works best when all puzzles are the same size.
+    data_collator = DataCollatorWithPadding(tokenizer=tokenizer)  # This adds extra paper to smaller puzzles to make them all the same size.
+    # We're setting up our puzzle pages, making sure we're ready to solve them one by one.
+    tokenized_datasets = tokenized_datasets.remove_columns(["sentence1", "sentence2", "idx"])  # We remove stuff we don't need.
+    tokenized_datasets = tokenized_datasets.rename_column("label", "labels")  # We make sure the puzzle answers are labeled correctly.
+    tokenized_datasets.set_format("torch")  # We make sure our puzzles are in the right format for our brain to understand.
+    # Now, we're ready to start solving puzzles, one page at a time.
+    train_dataloader = DataLoader(
+        tokenized_datasets["train"], shuffle=True, batch_size=8, collate_fn=data_collator
+    )  # This is our training puzzles.
+    eval_dataloader = DataLoader(
+        tokenized_datasets["validation"], batch_size=8, collate_fn=data_collator
+    )  # These are puzzles we use to check our progress.
+    # We need a puzzle solver, which is specially trained to solve these types of puzzles.
+    model = AutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=2)  # This is our puzzle-solving robot.
+    # Our robot needs instructions on how to get better at solving puzzles.
+    optimizer = AdamW(model.parameters(), lr=5e-5)  # This tells our robot how to improve.
+    num_epochs = 3  # This is how many times we'll go through the whole book of puzzles.
+    num_training_steps = num_epochs * len(train_dataloader)  # This is the total number of puzzles we'll solve.
+    lr_scheduler = get_scheduler(
+        "linear",
+        optimizer=optimizer,
+        num_warmup_steps=0,
+        num_training_steps=num_training_steps,
+    )  # This adjusts how quickly our robot learns over time.
+    # To solve puzzles super fast, we're going to use a rocket!
+    accelerator = Accelerator()  # This is our rocket that makes everything go faster.
+    model, optimizer, train_dataloader, eval_dataloader = accelerator.prepare(
+        model, optimizer, train_dataloader, eval_dataloader
+    )  # We make sure our robot, our puzzles, and our instructions are all ready for the rocket.
+    # It's time to start solving puzzles!
+    progress_bar = tqdm(range(num_training_steps))  # This shows us our progress.
+    model.train()  # We tell our robot it's time to start learning.
+    for epoch in range(num_epochs):  # We go through our book of puzzles multiple times to get really good.
+        for batch in train_dataloader:  # Each time, we take a page of puzzles to solve.
+            outputs = model(**batch)  # Our robot tries to solve the puzzles.
+            loss = outputs.loss  # We check how many mistakes it made.
+            accelerator.backward(loss)  # We give feedback to our robot so it can learn from its mistakes.
+            optimizer.step()  # We update our robot's puzzle-solving strategy.
+            lr_scheduler.step()  # We adjust how quickly our robot is learning.
+            optimizer.zero_grad()  # We reset some settings to make sure our robot is ready for the next page.
+            progress_bar.update(1)  # We update our progress bar to show how many puzzles we've solved.
+    # After all that practice, it's time to test how good our robot has become at solving puzzles.
+    metric = evaluate.load("glue", "mrpc")  # This is like the answer key to check our robot's work.
+    model.eval()  # We tell our robot it's time to show what it's learned.
+    for batch in eval_dataloader:  # We take a page of puzzles we haven't solved yet.
+        with torch.no_grad():  # We make sure we're just testing, not learning anymore.
+            outputs = model(**batch)  # Our robot solves the puzzles.
+        logits = outputs.logits  # We look at our robot's answers.
+        predictions = torch.argmax(logits, dim=-1)  # We decide which answer our robot thinks is right.
+        metric.add_batch(predictions=predictions, references=batch["labels"])  # We compare our robot's answers to the correct answers.
+    final_score = metric.compute()  # We calculate how well our robot did.
+    print(final_score)  # We print out the score to see how well our robot solved the puzzles!
+    model.save_pretrained("path/to/save/model")
+    tokenizer.save_pretrained("path/to/save/tokenizer")

.history/trainml_20240217162441.py ADDED Viewed

	@@ -0,0 +1,94 @@

+# First, we grab tools from our toolbox. These tools help us with different tasks like reading books (datasets),
+# learning new languages (tokenization), and solving puzzles (models).
+from datasets import load_dataset  # This tool helps us get our book, where the puzzles are.
+from transformers import AutoTokenizer, AutoModelForSequenceClassification, AdamW, get_scheduler  # These help us understand and solve puzzles.
+from transformers import DataCollatorWithPadding  # This makes sure all puzzle pieces are the same size.
+from torch.utils.data import DataLoader  # This helps us handle one page of puzzles at a time.
+import torch  # This is like the brain of our operations, helping us think through puzzles.
+from tqdm.auto import tqdm  # This is our progress bar, showing us how far we've come in solving the book.
+import evaluate  # This tells us how well we did in solving puzzles.
+from accelerate import Accelerator  # This makes everything go super fast, like a rocket!
+def train_and_save_model():
+    # Now, let's pick up the book we're going to solve today.
+    raw_datasets = load_dataset("glue", "mrpc")  # This is a book filled with puzzles about matching sentences.
+    # Before we start solving puzzles, we need to understand the language they're written in.
+    checkpoint = "bert-base-uncased"  # This is a guidebook to help us understand the puzzles' language.
+    tokenizer = AutoTokenizer.from_pretrained(checkpoint)  # This tool helps us read and understand the language in our book.
+    # To solve puzzles, we need to make sure we understand each sentence properly.
+    def tokenize_function(example):  # This is like reading each sentence carefully and understanding each word.
+        return tokenizer(example["sentence1"], example["sentence2"], truncation=True)
+    # We prepare all puzzles in the book so they're ready to solve.
+    tokenized_datasets = raw_datasets.map(tokenize_function, batched=True)  # This is like marking all the important parts of the sentences.
+    # Puzzles can be different sizes, but our puzzle solver works best when all puzzles are the same size.
+    data_collator = DataCollatorWithPadding(tokenizer=tokenizer)  # This adds extra paper to smaller puzzles to make them all the same size.
+    # We're setting up our puzzle pages, making sure we're ready to solve them one by one.
+    tokenized_datasets = tokenized_datasets.remove_columns(["sentence1", "sentence2", "idx"])  # We remove stuff we don't need.
+    tokenized_datasets = tokenized_datasets.rename_column("label", "labels")  # We make sure the puzzle answers are labeled correctly.
+    tokenized_datasets.set_format("torch")  # We make sure our puzzles are in the right format for our brain to understand.
+    # Now, we're ready to start solving puzzles, one page at a time.
+    train_dataloader = DataLoader(
+        tokenized_datasets["train"], shuffle=True, batch_size=8, collate_fn=data_collator
+    )  # This is our training puzzles.
+    eval_dataloader = DataLoader(
+        tokenized_datasets["validation"], batch_size=8, collate_fn=data_collator
+    )  # These are puzzles we use to check our progress.
+    # We need a puzzle solver, which is specially trained to solve these types of puzzles.
+    model = AutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=2)  # This is our puzzle-solving robot.
+    # Our robot needs instructions on how to get better at solving puzzles.
+    optimizer = AdamW(model.parameters(), lr=5e-5)  # This tells our robot how to improve.
+    num_epochs = 3  # This is how many times we'll go through the whole book of puzzles.
+    num_training_steps = num_epochs * len(train_dataloader)  # This is the total number of puzzles we'll solve.
+    lr_scheduler = get_scheduler(
+        "linear",
+        optimizer=optimizer,
+        num_warmup_steps=0,
+        num_training_steps=num_training_steps,
+    )  # This adjusts how quickly our robot learns over time.
+    # To solve puzzles super fast, we're going to use a rocket!
+    accelerator = Accelerator()  # This is our rocket that makes everything go faster.
+    model, optimizer, train_dataloader, eval_dataloader = accelerator.prepare(
+        model, optimizer, train_dataloader, eval_dataloader
+    )  # We make sure our robot, our puzzles, and our instructions are all ready for the rocket.
+    # It's time to start solving puzzles!
+    progress_bar = tqdm(range(num_training_steps))  # This shows us our progress.
+    model.train()  # We tell our robot it's time to start learning.
+    for epoch in range(num_epochs):  # We go through our book of puzzles multiple times to get really good.
+        for batch in train_dataloader:  # Each time, we take a page of puzzles to solve.
+            outputs = model(**batch)  # Our robot tries to solve the puzzles.
+            loss = outputs.loss  # We check how many mistakes it made.
+            accelerator.backward(loss)  # We give feedback to our robot so it can learn from its mistakes.
+            optimizer.step()  # We update our robot's puzzle-solving strategy.
+            lr_scheduler.step()  # We adjust how quickly our robot is learning.
+            optimizer.zero_grad()  # We reset some settings to make sure our robot is ready for the next page.
+            progress_bar.update(1)  # We update our progress bar to show how many puzzles we've solved.
+    # After all that practice, it's time to test how good our robot has become at solving puzzles.
+    metric = evaluate.load("glue", "mrpc")  # This is like the answer key to check our robot's work.
+    model.eval()  # We tell our robot it's time to show what it's learned.
+    for batch in eval_dataloader:  # We take a page of puzzles we haven't solved yet.
+        with torch.no_grad():  # We make sure we're just testing, not learning anymore.
+            outputs = model(**batch)  # Our robot solves the puzzles.
+        logits = outputs.logits  # We look at our robot's answers.
+        predictions = torch.argmax(logits, dim=-1)  # We decide which answer our robot thinks is right.
+        metric.add_batch(predictions=predictions, references=batch["labels"])  # We compare our robot's answers to the correct answers.
+    final_score = metric.compute()  # We calculate how well our robot did.
+    print(final_score)  # We print out the score to see how well our robot solved the puzzles!
+    model.save_pretrained("path/to/save/model")
+    tokenizer.save_pretrained("path/to/save/tokenizer")
+if __name__ == "__main__":
+    train_and_save_model()

.lh/app.py.json CHANGED Viewed

@@ -3,7 +3,7 @@
     "activeCommit": 0,
     "commits": [
         {
-            "activePatchIndex": 2,
             "patches": [
                 {
                     "date": 1708166138917,
@@ -16,6 +16,14 @@
                 {
                     "date": 1708166830798,
                     "content": "Index: \n===================================================================\n--- \n+++ \n@@ -31,5 +31,5 @@\n     title=\"Paraphrase Identification\",\n     description=\"This model predicts whether two sentences are paraphrases of each other.\"\n )\n \n-iface.launch()\n\\n+iface.launch()\n"
                 }
             ],
             "date": 1708166138917,

     "activeCommit": 0,
     "commits": [
         {
+            "activePatchIndex": 4,
             "patches": [
                 {
                     "date": 1708166138917,
                 {
                     "date": 1708166830798,
                     "content": "Index: \n===================================================================\n--- \n+++ \n@@ -31,5 +31,5 @@\n     title=\"Paraphrase Identification\",\n     description=\"This model predicts whether two sentences are paraphrases of each other.\"\n )\n \n-iface.launch()\n\\n+iface.launch()\n"
+                },
+                {
+                    "date": 1708167302135,
+                    "content": "Index: \n===================================================================\n--- \n+++ \n@@ -1,8 +1,10 @@\n import gradio as gr\n from transformers import AutoTokenizer, AutoModelForSequenceClassification\n import torch\n+from trainml import train_and_save_model  # Import the training function\n \n+\n # Load the trained model and tokenizer\n model_path = \"path/to/save/model\"\n tokenizer_path = \"path/to/save/tokenizer\"\n \n"
+                },
+                {
+                    "date": 1708167312025,
+                    "content": "Index: \n===================================================================\n--- \n+++ \n@@ -1,10 +1,10 @@\n import gradio as gr\n from transformers import AutoTokenizer, AutoModelForSequenceClassification\n import torch\n from trainml import train_and_save_model  # Import the training function\n+train_and_save_model()\n \n-\n # Load the trained model and tokenizer\n model_path = \"path/to/save/model\"\n tokenizer_path = \"path/to/save/tokenizer\"\n \n"
                 }
             ],
             "date": 1708166138917,

.lh/trainml.py.json CHANGED Viewed

@@ -3,7 +3,7 @@
     "activeCommit": 0,
     "commits": [
         {
-            "activePatchIndex": 1,
             "patches": [
                 {
                     "date": 1708166375103,
@@ -12,6 +12,22 @@
                 {
                     "date": 1708166792627,
                     "content": "Index: \n===================================================================\n--- \n+++ \n@@ -83,4 +83,7 @@\n     metric.add_batch(predictions=predictions, references=batch[\"labels\"])  # We compare our robot's answers to the correct answers.\n \n final_score = metric.compute()  # We calculate how well our robot did.\n print(final_score)  # We print out the score to see how well our robot solved the puzzles!\n+\n+model.save_pretrained(\"path/to/save/model\")\n+tokenizer.save_pretrained(\"path/to/save/tokenizer\")\n\\n"
                 }
             ],
             "date": 1708166375103,

     "activeCommit": 0,
     "commits": [
         {
+            "activePatchIndex": 5,
             "patches": [
                 {
                     "date": 1708166375103,
                 {
                     "date": 1708166792627,
                     "content": "Index: \n===================================================================\n--- \n+++ \n@@ -83,4 +83,7 @@\n     metric.add_batch(predictions=predictions, references=batch[\"labels\"])  # We compare our robot's answers to the correct answers.\n \n final_score = metric.compute()  # We calculate how well our robot did.\n print(final_score)  # We print out the score to see how well our robot solved the puzzles!\n+\n+model.save_pretrained(\"path/to/save/model\")\n+tokenizer.save_pretrained(\"path/to/save/tokenizer\")\n\\n"
+                },
+                {
+                    "date": 1708167245700,
+                    "content": "Index: \n===================================================================\n--- \n+++ \n@@ -8,8 +8,9 @@\n from tqdm.auto import tqdm  # This is our progress bar, showing us how far we've come in solving the book.\n import evaluate  # This tells us how well we did in solving puzzles.\n from accelerate import Accelerator  # This makes everything go super fast, like a rocket!\n \n+def train_and_save_model():\n # Now, let's pick up the book we're going to solve today.\n raw_datasets = load_dataset(\"glue\", \"mrpc\")  # This is a book filled with puzzles about matching sentences.\n \n # Before we start solving puzzles, we need to understand the language they're written in.\n"
+                },
+                {
+                    "date": 1708167252023,
+                    "content": "Index: \n===================================================================\n--- \n+++ \n@@ -86,5 +86,5 @@\n final_score = metric.compute()  # We calculate how well our robot did.\n print(final_score)  # We print out the score to see how well our robot solved the puzzles!\n \n model.save_pretrained(\"path/to/save/model\")\n-tokenizer.save_pretrained(\"path/to/save/tokenizer\")\n\\n+tokenizer.save_pretrained(\"path/to/save/tokenizer\")\n"
+                },
+                {
+                    "date": 1708167259383,
+                    "content": "Index: \n===================================================================\n--- \n+++ \n@@ -9,82 +9,82 @@\n import evaluate  # This tells us how well we did in solving puzzles.\n from accelerate import Accelerator  # This makes everything go super fast, like a rocket!\n \n def train_and_save_model():\n-# Now, let's pick up the book we're going to solve today.\n-raw_datasets = load_dataset(\"glue\", \"mrpc\")  # This is a book filled with puzzles about matching sentences.\n+    # Now, let's pick up the book we're going to solve today.\n+    raw_datasets = load_dataset(\"glue\", \"mrpc\")  # This is a book filled with puzzles about matching sentences.\n \n-# Before we start solving puzzles, we need to understand the language they're written in.\n-checkpoint = \"bert-base-uncased\"  # This is a guidebook to help us understand the puzzles' language.\n-tokenizer = AutoTokenizer.from_pretrained(checkpoint)  # This tool helps us read and understand the language in our book.\n+    # Before we start solving puzzles, we need to understand the language they're written in.\n+    checkpoint = \"bert-base-uncased\"  # This is a guidebook to help us understand the puzzles' language.\n+    tokenizer = AutoTokenizer.from_pretrained(checkpoint)  # This tool helps us read and understand the language in our book.\n \n-# To solve puzzles, we need to make sure we understand each sentence properly.\n-def tokenize_function(example):  # This is like reading each sentence carefully and understanding each word.\n-    return tokenizer(example[\"sentence1\"], example[\"sentence2\"], truncation=True)\n+    # To solve puzzles, we need to make sure we understand each sentence properly.\n+    def tokenize_function(example):  # This is like reading each sentence carefully and understanding each word.\n+        return tokenizer(example[\"sentence1\"], example[\"sentence2\"], truncation=True)\n \n-# We prepare all puzzles in the book so they're ready to solve.\n-tokenized_datasets = raw_datasets.map(tokenize_function, batched=True)  # This is like marking all the important parts of the sentences.\n+    # We prepare all puzzles in the book so they're ready to solve.\n+    tokenized_datasets = raw_datasets.map(tokenize_function, batched=True)  # This is like marking all the important parts of the sentences.\n \n-# Puzzles can be different sizes, but our puzzle solver works best when all puzzles are the same size.\n-data_collator = DataCollatorWithPadding(tokenizer=tokenizer)  # This adds extra paper to smaller puzzles to make them all the same size.\n+    # Puzzles can be different sizes, but our puzzle solver works best when all puzzles are the same size.\n+    data_collator = DataCollatorWithPadding(tokenizer=tokenizer)  # This adds extra paper to smaller puzzles to make them all the same size.\n \n-# We're setting up our puzzle pages, making sure we're ready to solve them one by one.\n-tokenized_datasets = tokenized_datasets.remove_columns([\"sentence1\", \"sentence2\", \"idx\"])  # We remove stuff we don't need.\n-tokenized_datasets = tokenized_datasets.rename_column(\"label\", \"labels\")  # We make sure the puzzle answers are labeled correctly.\n-tokenized_datasets.set_format(\"torch\")  # We make sure our puzzles are in the right format for our brain to understand.\n+    # We're setting up our puzzle pages, making sure we're ready to solve them one by one.\n+    tokenized_datasets = tokenized_datasets.remove_columns([\"sentence1\", \"sentence2\", \"idx\"])  # We remove stuff we don't need.\n+    tokenized_datasets = tokenized_datasets.rename_column(\"label\", \"labels\")  # We make sure the puzzle answers are labeled correctly.\n+    tokenized_datasets.set_format(\"torch\")  # We make sure our puzzles are in the right format for our brain to understand.\n \n-# Now, we're ready to start solving puzzles, one page at a time.\n-train_dataloader = DataLoader(\n-    tokenized_datasets[\"train\"], shuffle=True, batch_size=8, collate_fn=data_collator\n-)  # This is our training puzzles.\n-eval_dataloader = DataLoader(\n-    tokenized_datasets[\"validation\"], batch_size=8, collate_fn=data_collator\n-)  # These are puzzles we use to check our progress.\n+    # Now, we're ready to start solving puzzles, one page at a time.\n+    train_dataloader = DataLoader(\n+        tokenized_datasets[\"train\"], shuffle=True, batch_size=8, collate_fn=data_collator\n+    )  # This is our training puzzles.\n+    eval_dataloader = DataLoader(\n+        tokenized_datasets[\"validation\"], batch_size=8, collate_fn=data_collator\n+    )  # These are puzzles we use to check our progress.\n \n-# We need a puzzle solver, which is specially trained to solve these types of puzzles.\n-model = AutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=2)  # This is our puzzle-solving robot.\n+    # We need a puzzle solver, which is specially trained to solve these types of puzzles.\n+    model = AutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=2)  # This is our puzzle-solving robot.\n \n-# Our robot needs instructions on how to get better at solving puzzles.\n-optimizer = AdamW(model.parameters(), lr=5e-5)  # This tells our robot how to improve.\n-num_epochs = 3  # This is how many times we'll go through the whole book of puzzles.\n-num_training_steps = num_epochs * len(train_dataloader)  # This is the total number of puzzles we'll solve.\n-lr_scheduler = get_scheduler(\n-    \"linear\",\n-    optimizer=optimizer,\n-    num_warmup_steps=0,\n-    num_training_steps=num_training_steps,\n-)  # This adjusts how quickly our robot learns over time.\n+    # Our robot needs instructions on how to get better at solving puzzles.\n+    optimizer = AdamW(model.parameters(), lr=5e-5)  # This tells our robot how to improve.\n+    num_epochs = 3  # This is how many times we'll go through the whole book of puzzles.\n+    num_training_steps = num_epochs * len(train_dataloader)  # This is the total number of puzzles we'll solve.\n+    lr_scheduler = get_scheduler(\n+        \"linear\",\n+        optimizer=optimizer,\n+        num_warmup_steps=0,\n+        num_training_steps=num_training_steps,\n+    )  # This adjusts how quickly our robot learns over time.\n \n-# To solve puzzles super fast, we're going to use a rocket!\n-accelerator = Accelerator()  # This is our rocket that makes everything go faster.\n-model, optimizer, train_dataloader, eval_dataloader = accelerator.prepare(\n-    model, optimizer, train_dataloader, eval_dataloader\n-)  # We make sure our robot, our puzzles, and our instructions are all ready for the rocket.\n+    # To solve puzzles super fast, we're going to use a rocket!\n+    accelerator = Accelerator()  # This is our rocket that makes everything go faster.\n+    model, optimizer, train_dataloader, eval_dataloader = accelerator.prepare(\n+        model, optimizer, train_dataloader, eval_dataloader\n+    )  # We make sure our robot, our puzzles, and our instructions are all ready for the rocket.\n \n-# It's time to start solving puzzles!\n-progress_bar = tqdm(range(num_training_steps))  # This shows us our progress.\n-model.train()  # We tell our robot it's time to start learning.\n-for epoch in range(num_epochs):  # We go through our book of puzzles multiple times to get really good.\n-    for batch in train_dataloader:  # Each time, we take a page of puzzles to solve.\n-        outputs = model(**batch)  # Our robot tries to solve the puzzles.\n-        loss = outputs.loss  # We check how many mistakes it made.\n-        accelerator.backward(loss)  # We give feedback to our robot so it can learn from its mistakes.\n-        optimizer.step()  # We update our robot's puzzle-solving strategy.\n-        lr_scheduler.step()  # We adjust how quickly our robot is learning.\n-        optimizer.zero_grad()  # We reset some settings to make sure our robot is ready for the next page.\n-        progress_bar.update(1)  # We update our progress bar to show how many puzzles we've solved.\n+    # It's time to start solving puzzles!\n+    progress_bar = tqdm(range(num_training_steps))  # This shows us our progress.\n+    model.train()  # We tell our robot it's time to start learning.\n+    for epoch in range(num_epochs):  # We go through our book of puzzles multiple times to get really good.\n+        for batch in train_dataloader:  # Each time, we take a page of puzzles to solve.\n+            outputs = model(**batch)  # Our robot tries to solve the puzzles.\n+            loss = outputs.loss  # We check how many mistakes it made.\n+            accelerator.backward(loss)  # We give feedback to our robot so it can learn from its mistakes.\n+            optimizer.step()  # We update our robot's puzzle-solving strategy.\n+            lr_scheduler.step()  # We adjust how quickly our robot is learning.\n+            optimizer.zero_grad()  # We reset some settings to make sure our robot is ready for the next page.\n+            progress_bar.update(1)  # We update our progress bar to show how many puzzles we've solved.\n \n-# After all that practice, it's time to test how good our robot has become at solving puzzles.\n-metric = evaluate.load(\"glue\", \"mrpc\")  # This is like the answer key to check our robot's work.\n-model.eval()  # We tell our robot it's time to show what it's learned.\n-for batch in eval_dataloader:  # We take a page of puzzles we haven't solved yet.\n-    with torch.no_grad():  # We make sure we're just testing, not learning anymore.\n-        outputs = model(**batch)  # Our robot solves the puzzles.\n-    logits = outputs.logits  # We look at our robot's answers.\n-    predictions = torch.argmax(logits, dim=-1)  # We decide which answer our robot thinks is right.\n-    metric.add_batch(predictions=predictions, references=batch[\"labels\"])  # We compare our robot's answers to the correct answers.\n+    # After all that practice, it's time to test how good our robot has become at solving puzzles.\n+    metric = evaluate.load(\"glue\", \"mrpc\")  # This is like the answer key to check our robot's work.\n+    model.eval()  # We tell our robot it's time to show what it's learned.\n+    for batch in eval_dataloader:  # We take a page of puzzles we haven't solved yet.\n+        with torch.no_grad():  # We make sure we're just testing, not learning anymore.\n+            outputs = model(**batch)  # Our robot solves the puzzles.\n+        logits = outputs.logits  # We look at our robot's answers.\n+        predictions = torch.argmax(logits, dim=-1)  # We decide which answer our robot thinks is right.\n+        metric.add_batch(predictions=predictions, references=batch[\"labels\"])  # We compare our robot's answers to the correct answers.\n \n-final_score = metric.compute()  # We calculate how well our robot did.\n-print(final_score)  # We print out the score to see how well our robot solved the puzzles!\n+    final_score = metric.compute()  # We calculate how well our robot did.\n+    print(final_score)  # We print out the score to see how well our robot solved the puzzles!\n \n-model.save_pretrained(\"path/to/save/model\")\n-tokenizer.save_pretrained(\"path/to/save/tokenizer\")\n+    model.save_pretrained(\"path/to/save/model\")\n+    tokenizer.save_pretrained(\"path/to/save/tokenizer\")\n"
+                },
+                {
+                    "date": 1708167281057,
+                    "content": "Index: \n===================================================================\n--- \n+++ \n@@ -87,4 +87,8 @@\n     print(final_score)  # We print out the score to see how well our robot solved the puzzles!\n \n     model.save_pretrained(\"path/to/save/model\")\n     tokenizer.save_pretrained(\"path/to/save/tokenizer\")\n+\n+if __name__ == \"__main__\":\n+    train_and_save_model()\n+\n"
                 }
             ],
             "date": 1708166375103,

app.py CHANGED Viewed

@@ -1,6 +1,8 @@
 import gradio as gr
 from transformers import AutoTokenizer, AutoModelForSequenceClassification
 import torch
 # Load the trained model and tokenizer
 model_path = "path/to/save/model"

 import gradio as gr
 from transformers import AutoTokenizer, AutoModelForSequenceClassification
 import torch
+from trainml import train_and_save_model  # Import the training function
+train_and_save_model()
 # Load the trained model and tokenizer
 model_path = "path/to/save/model"

trainml.py CHANGED Viewed

@@ -9,81 +9,86 @@ from tqdm.auto import tqdm  # This is our progress bar, showing us how far we've
 import evaluate  # This tells us how well we did in solving puzzles.
 from accelerate import Accelerator  # This makes everything go super fast, like a rocket!
-# Now, let's pick up the book we're going to solve today.
-raw_datasets = load_dataset("glue", "mrpc")  # This is a book filled with puzzles about matching sentences.
-# Before we start solving puzzles, we need to understand the language they're written in.
-checkpoint = "bert-base-uncased"  # This is a guidebook to help us understand the puzzles' language.
-tokenizer = AutoTokenizer.from_pretrained(checkpoint)  # This tool helps us read and understand the language in our book.
-# To solve puzzles, we need to make sure we understand each sentence properly.
-def tokenize_function(example):  # This is like reading each sentence carefully and understanding each word.
-    return tokenizer(example["sentence1"], example["sentence2"], truncation=True)
-# We prepare all puzzles in the book so they're ready to solve.
-tokenized_datasets = raw_datasets.map(tokenize_function, batched=True)  # This is like marking all the important parts of the sentences.
-# Puzzles can be different sizes, but our puzzle solver works best when all puzzles are the same size.
-data_collator = DataCollatorWithPadding(tokenizer=tokenizer)  # This adds extra paper to smaller puzzles to make them all the same size.
-# We're setting up our puzzle pages, making sure we're ready to solve them one by one.
-tokenized_datasets = tokenized_datasets.remove_columns(["sentence1", "sentence2", "idx"])  # We remove stuff we don't need.
-tokenized_datasets = tokenized_datasets.rename_column("label", "labels")  # We make sure the puzzle answers are labeled correctly.
-tokenized_datasets.set_format("torch")  # We make sure our puzzles are in the right format for our brain to understand.
-# Now, we're ready to start solving puzzles, one page at a time.
-train_dataloader = DataLoader(
-    tokenized_datasets["train"], shuffle=True, batch_size=8, collate_fn=data_collator
-)  # This is our training puzzles.
-eval_dataloader = DataLoader(
-    tokenized_datasets["validation"], batch_size=8, collate_fn=data_collator
-)  # These are puzzles we use to check our progress.
-# We need a puzzle solver, which is specially trained to solve these types of puzzles.
-model = AutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=2)  # This is our puzzle-solving robot.
-# Our robot needs instructions on how to get better at solving puzzles.
-optimizer = AdamW(model.parameters(), lr=5e-5)  # This tells our robot how to improve.
-num_epochs = 3  # This is how many times we'll go through the whole book of puzzles.
-num_training_steps = num_epochs * len(train_dataloader)  # This is the total number of puzzles we'll solve.
-lr_scheduler = get_scheduler(
-    "linear",
-    optimizer=optimizer,
-    num_warmup_steps=0,
-    num_training_steps=num_training_steps,
-)  # This adjusts how quickly our robot learns over time.
-# To solve puzzles super fast, we're going to use a rocket!
-accelerator = Accelerator()  # This is our rocket that makes everything go faster.
-model, optimizer, train_dataloader, eval_dataloader = accelerator.prepare(
-    model, optimizer, train_dataloader, eval_dataloader
-)  # We make sure our robot, our puzzles, and our instructions are all ready for the rocket.
-# It's time to start solving puzzles!
-progress_bar = tqdm(range(num_training_steps))  # This shows us our progress.
-model.train()  # We tell our robot it's time to start learning.
-for epoch in range(num_epochs):  # We go through our book of puzzles multiple times to get really good.
-    for batch in train_dataloader:  # Each time, we take a page of puzzles to solve.
-        outputs = model(**batch)  # Our robot tries to solve the puzzles.
-        loss = outputs.loss  # We check how many mistakes it made.
-        accelerator.backward(loss)  # We give feedback to our robot so it can learn from its mistakes.
-        optimizer.step()  # We update our robot's puzzle-solving strategy.
-        lr_scheduler.step()  # We adjust how quickly our robot is learning.
-        optimizer.zero_grad()  # We reset some settings to make sure our robot is ready for the next page.
-        progress_bar.update(1)  # We update our progress bar to show how many puzzles we've solved.
-# After all that practice, it's time to test how good our robot has become at solving puzzles.
-metric = evaluate.load("glue", "mrpc")  # This is like the answer key to check our robot's work.
-model.eval()  # We tell our robot it's time to show what it's learned.
-for batch in eval_dataloader:  # We take a page of puzzles we haven't solved yet.
-    with torch.no_grad():  # We make sure we're just testing, not learning anymore.
-        outputs = model(**batch)  # Our robot solves the puzzles.
-    logits = outputs.logits  # We look at our robot's answers.
-    predictions = torch.argmax(logits, dim=-1)  # We decide which answer our robot thinks is right.
-    metric.add_batch(predictions=predictions, references=batch["labels"])  # We compare our robot's answers to the correct answers.
-final_score = metric.compute()  # We calculate how well our robot did.
-print(final_score)  # We print out the score to see how well our robot solved the puzzles!
-model.save_pretrained("path/to/save/model")
-tokenizer.save_pretrained("path/to/save/tokenizer")

 import evaluate  # This tells us how well we did in solving puzzles.
 from accelerate import Accelerator  # This makes everything go super fast, like a rocket!
+def train_and_save_model():
+    # Now, let's pick up the book we're going to solve today.
+    raw_datasets = load_dataset("glue", "mrpc")  # This is a book filled with puzzles about matching sentences.
+    # Before we start solving puzzles, we need to understand the language they're written in.
+    checkpoint = "bert-base-uncased"  # This is a guidebook to help us understand the puzzles' language.
+    tokenizer = AutoTokenizer.from_pretrained(checkpoint)  # This tool helps us read and understand the language in our book.
+    # To solve puzzles, we need to make sure we understand each sentence properly.
+    def tokenize_function(example):  # This is like reading each sentence carefully and understanding each word.
+        return tokenizer(example["sentence1"], example["sentence2"], truncation=True)
+    # We prepare all puzzles in the book so they're ready to solve.
+    tokenized_datasets = raw_datasets.map(tokenize_function, batched=True)  # This is like marking all the important parts of the sentences.
+    # Puzzles can be different sizes, but our puzzle solver works best when all puzzles are the same size.
+    data_collator = DataCollatorWithPadding(tokenizer=tokenizer)  # This adds extra paper to smaller puzzles to make them all the same size.
+    # We're setting up our puzzle pages, making sure we're ready to solve them one by one.
+    tokenized_datasets = tokenized_datasets.remove_columns(["sentence1", "sentence2", "idx"])  # We remove stuff we don't need.
+    tokenized_datasets = tokenized_datasets.rename_column("label", "labels")  # We make sure the puzzle answers are labeled correctly.
+    tokenized_datasets.set_format("torch")  # We make sure our puzzles are in the right format for our brain to understand.
+    # Now, we're ready to start solving puzzles, one page at a time.
+    train_dataloader = DataLoader(
+        tokenized_datasets["train"], shuffle=True, batch_size=8, collate_fn=data_collator
+    )  # This is our training puzzles.
+    eval_dataloader = DataLoader(
+        tokenized_datasets["validation"], batch_size=8, collate_fn=data_collator
+    )  # These are puzzles we use to check our progress.
+    # We need a puzzle solver, which is specially trained to solve these types of puzzles.
+    model = AutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=2)  # This is our puzzle-solving robot.
+    # Our robot needs instructions on how to get better at solving puzzles.
+    optimizer = AdamW(model.parameters(), lr=5e-5)  # This tells our robot how to improve.
+    num_epochs = 3  # This is how many times we'll go through the whole book of puzzles.
+    num_training_steps = num_epochs * len(train_dataloader)  # This is the total number of puzzles we'll solve.
+    lr_scheduler = get_scheduler(
+        "linear",
+        optimizer=optimizer,
+        num_warmup_steps=0,
+        num_training_steps=num_training_steps,
+    )  # This adjusts how quickly our robot learns over time.
+    # To solve puzzles super fast, we're going to use a rocket!
+    accelerator = Accelerator()  # This is our rocket that makes everything go faster.
+    model, optimizer, train_dataloader, eval_dataloader = accelerator.prepare(
+        model, optimizer, train_dataloader, eval_dataloader
+    )  # We make sure our robot, our puzzles, and our instructions are all ready for the rocket.
+    # It's time to start solving puzzles!
+    progress_bar = tqdm(range(num_training_steps))  # This shows us our progress.
+    model.train()  # We tell our robot it's time to start learning.
+    for epoch in range(num_epochs):  # We go through our book of puzzles multiple times to get really good.
+        for batch in train_dataloader:  # Each time, we take a page of puzzles to solve.
+            outputs = model(**batch)  # Our robot tries to solve the puzzles.
+            loss = outputs.loss  # We check how many mistakes it made.
+            accelerator.backward(loss)  # We give feedback to our robot so it can learn from its mistakes.
+            optimizer.step()  # We update our robot's puzzle-solving strategy.
+            lr_scheduler.step()  # We adjust how quickly our robot is learning.
+            optimizer.zero_grad()  # We reset some settings to make sure our robot is ready for the next page.
+            progress_bar.update(1)  # We update our progress bar to show how many puzzles we've solved.
+    # After all that practice, it's time to test how good our robot has become at solving puzzles.
+    metric = evaluate.load("glue", "mrpc")  # This is like the answer key to check our robot's work.
+    model.eval()  # We tell our robot it's time to show what it's learned.
+    for batch in eval_dataloader:  # We take a page of puzzles we haven't solved yet.
+        with torch.no_grad():  # We make sure we're just testing, not learning anymore.
+            outputs = model(**batch)  # Our robot solves the puzzles.
+        logits = outputs.logits  # We look at our robot's answers.
+        predictions = torch.argmax(logits, dim=-1)  # We decide which answer our robot thinks is right.
+        metric.add_batch(predictions=predictions, references=batch["labels"])  # We compare our robot's answers to the correct answers.
+    final_score = metric.compute()  # We calculate how well our robot did.
+    print(final_score)  # We print out the score to see how well our robot solved the puzzles!
+    model.save_pretrained("path/to/save/model")
+    tokenizer.save_pretrained("path/to/save/tokenizer")
+if __name__ == "__main__":
+    train_and_save_model()