amelkhoadry
/

optimized-bert-model

Safetensors

bert

Model card Files Files and versions

xet

Community

amelkhoadry commited on Oct 31, 2025

Commit

a62de9c

verified ·

1 Parent(s): 031045b

Update README.md

Browse files

Files changed (1) hide show

README.md +74 -41

README.md CHANGED Viewed

@@ -1,49 +1,82 @@
----
-license: mit
----
 from transformers import AutoTokenizer, AutoModelForSequenceClassification
-from datasets import load_dataset
 import torch
-# Load model and tokenizer from Hugging Face Hub
-repo_id = "amelkhoadry/optimized-bert-model"
-model = AutoModelForSequenceClassification.from_pretrained(repo_id)
-tokenizer = AutoTokenizer.from_pretrained(repo_id)
-# Load your test dataset (replace 'your_dataset'/'test' with your actual dataset and split)
-# Example uses the 'imdb' dataset as a placeholder. Replace as needed.
-test_dataset = load_dataset("imdb", split="test")
-# Preprocess test data using the tokenizer
-def preprocess(example):
-    return tokenizer(
-        example["text"],
-        truncation=True,
-        padding='max_length',
-        max_length=128
-    )
-test_dataset = test_dataset.map(preprocess, batched=True)
-test_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'label'])
-# Predict on test dataset
-model.eval()
-predictions = []
-labels = []
 with torch.no_grad():
-    for batch in torch.utils.data.DataLoader(test_dataset, batch_size=32):
-        input_ids = batch["input_ids"]
-        attention_mask = batch["attention_mask"]
-        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
-        logits = outputs.logits
-        preds = torch.argmax(logits, dim=1)
-        predictions.extend(preds.tolist())
-        labels.extend(batch["label"].tolist())
-# Example: print the first 10 predictions and labels
-print("First 10 predictions:", predictions[:10])
-print("First 10 actual labels:", labels[:10])
-# If you want to compute accuracy:
-accuracy = sum([int(p == l) for p, l in zip(predictions, labels)]) / len(labels)
-print(f"Test Accuracy: {accuracy:.4f}")

+# ============================================================
+# SIMPLE EXAMPLE: How to Use Your Trained Model
+# ============================================================
 from transformers import AutoTokenizer, AutoModelForSequenceClassification
 import torch
+# Step 1: Load the model and tokenizer from the local directory
+# (This assumes you ran Cell 18 earlier to save the model)
+model_path = "optimized-bert-model"
+model = AutoModelForSequenceClassification.from_pretrained(model_path)
+tokenizer = AutoTokenizer.from_pretrained(model_path)
+# Step 2: Put model in evaluation mode
+model.eval()
+# Step 3: Test on a simple example
+# The model was trained on MRPC (paraphrase detection task)
+# It takes two sentences and predicts if they are paraphrases (1) or not (0)
+# Example 1: Two sentences that ARE paraphrases
+sentence1 = "The cat is sleeping on the mat"
+sentence2 = "The cat is napping on the mat"
+# Tokenize the sentences
+inputs = tokenizer(sentence1, sentence2, return_tensors="pt",
+                   truncation=True, padding=True, max_length=128)
+# Step 4: Make prediction
+with torch.no_grad():
+    outputs = model(**inputs)
+    logits = outputs.logits
+    prediction = torch.argmax(logits, dim=1).item()
+print("="*60)
+print("EXAMPLE 1 - Are these paraphrases?")
+print("="*60)
+print(f"Sentence 1: {sentence1}")
+print(f"Sentence 2: {sentence2}")
+print(f"Prediction: {'YES (paraphrases)' if prediction == 1 else 'NO (not paraphrases)'}")
+print(f"Confidence: {torch.softmax(logits, dim=1)[0].max().item():.4f}")
+print()
+# Example 2: Two sentences that are NOT paraphrases
+sentence1 = "The dog is barking loudly"
+sentence2 = "I love eating pizza"
+inputs = tokenizer(sentence1, sentence2, return_tensors="pt",
+                   truncation=True, padding=True, max_length=128)
 with torch.no_grad():
+    outputs = model(**inputs)
+    logits = outputs.logits
+    prediction = torch.argmax(logits, dim=1).item()
+print("="*60)
+print("EXAMPLE 2 - Are these paraphrases?")
+print("="*60)
+print(f"Sentence 1: {sentence1}")
+print(f"Sentence 2: {sentence2}")
+print(f"Prediction: {'YES (paraphrases)' if prediction == 1 else 'NO (not paraphrases)'}")
+print(f"Confidence: {torch.softmax(logits, dim=1)[0].max().item():.4f}")
+print("="*60)
+============================================================
+EXAMPLE 1 - Are these paraphrases?
+============================================================
+Sentence 1: The cat is sleeping on the mat
+Sentence 2: The cat is napping on the mat
+Prediction: YES (paraphrases)
+Confidence: 0.9998
+============================================================
+EXAMPLE 2 - Are these paraphrases?
+============================================================
+Sentence 1: The dog is barking loudly
+Sentence 2: I love eating pizza
+Prediction: NO (not paraphrases)
+Confidence: 0.9584
+============================================================