amelkhoadry commited on
Commit
a62de9c
·
verified ·
1 Parent(s): 031045b

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +74 -41
README.md CHANGED
@@ -1,49 +1,82 @@
1
- ---
2
- license: mit
3
- ---
 
4
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
5
- from datasets import load_dataset
6
  import torch
7
 
8
- # Load model and tokenizer from Hugging Face Hub
9
- repo_id = "amelkhoadry/optimized-bert-model"
10
- model = AutoModelForSequenceClassification.from_pretrained(repo_id)
11
- tokenizer = AutoTokenizer.from_pretrained(repo_id)
 
 
 
 
12
 
13
- # Load your test dataset (replace 'your_dataset'/'test' with your actual dataset and split)
14
- # Example uses the 'imdb' dataset as a placeholder. Replace as needed.
15
- test_dataset = load_dataset("imdb", split="test")
16
 
17
- # Preprocess test data using the tokenizer
18
- def preprocess(example):
19
- return tokenizer(
20
- example["text"],
21
- truncation=True,
22
- padding='max_length',
23
- max_length=128
24
- )
25
 
26
- test_dataset = test_dataset.map(preprocess, batched=True)
27
- test_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'label'])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
 
29
- # Predict on test dataset
30
- model.eval()
31
- predictions = []
32
- labels = []
33
  with torch.no_grad():
34
- for batch in torch.utils.data.DataLoader(test_dataset, batch_size=32):
35
- input_ids = batch["input_ids"]
36
- attention_mask = batch["attention_mask"]
37
- outputs = model(input_ids=input_ids, attention_mask=attention_mask)
38
- logits = outputs.logits
39
- preds = torch.argmax(logits, dim=1)
40
- predictions.extend(preds.tolist())
41
- labels.extend(batch["label"].tolist())
42
-
43
- # Example: print the first 10 predictions and labels
44
- print("First 10 predictions:", predictions[:10])
45
- print("First 10 actual labels:", labels[:10])
46
-
47
- # If you want to compute accuracy:
48
- accuracy = sum([int(p == l) for p, l in zip(predictions, labels)]) / len(labels)
49
- print(f"Test Accuracy: {accuracy:.4f}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ============================================================
2
+ # SIMPLE EXAMPLE: How to Use Your Trained Model
3
+ # ============================================================
4
+
5
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
 
6
  import torch
7
 
8
+ # Step 1: Load the model and tokenizer from the local directory
9
+ # (This assumes you ran Cell 18 earlier to save the model)
10
+ model_path = "optimized-bert-model"
11
+ model = AutoModelForSequenceClassification.from_pretrained(model_path)
12
+ tokenizer = AutoTokenizer.from_pretrained(model_path)
13
+
14
+ # Step 2: Put model in evaluation mode
15
+ model.eval()
16
 
17
+ # Step 3: Test on a simple example
18
+ # The model was trained on MRPC (paraphrase detection task)
19
+ # It takes two sentences and predicts if they are paraphrases (1) or not (0)
20
 
21
+ # Example 1: Two sentences that ARE paraphrases
22
+ sentence1 = "The cat is sleeping on the mat"
23
+ sentence2 = "The cat is napping on the mat"
 
 
 
 
 
24
 
25
+ # Tokenize the sentences
26
+ inputs = tokenizer(sentence1, sentence2, return_tensors="pt",
27
+ truncation=True, padding=True, max_length=128)
28
+
29
+ # Step 4: Make prediction
30
+ with torch.no_grad():
31
+ outputs = model(**inputs)
32
+ logits = outputs.logits
33
+ prediction = torch.argmax(logits, dim=1).item()
34
+
35
+ print("="*60)
36
+ print("EXAMPLE 1 - Are these paraphrases?")
37
+ print("="*60)
38
+ print(f"Sentence 1: {sentence1}")
39
+ print(f"Sentence 2: {sentence2}")
40
+ print(f"Prediction: {'YES (paraphrases)' if prediction == 1 else 'NO (not paraphrases)'}")
41
+ print(f"Confidence: {torch.softmax(logits, dim=1)[0].max().item():.4f}")
42
+ print()
43
+
44
+ # Example 2: Two sentences that are NOT paraphrases
45
+ sentence1 = "The dog is barking loudly"
46
+ sentence2 = "I love eating pizza"
47
+
48
+ inputs = tokenizer(sentence1, sentence2, return_tensors="pt",
49
+ truncation=True, padding=True, max_length=128)
50
 
 
 
 
 
51
  with torch.no_grad():
52
+ outputs = model(**inputs)
53
+ logits = outputs.logits
54
+ prediction = torch.argmax(logits, dim=1).item()
55
+
56
+ print("="*60)
57
+ print("EXAMPLE 2 - Are these paraphrases?")
58
+ print("="*60)
59
+ print(f"Sentence 1: {sentence1}")
60
+ print(f"Sentence 2: {sentence2}")
61
+ print(f"Prediction: {'YES (paraphrases)' if prediction == 1 else 'NO (not paraphrases)'}")
62
+ print(f"Confidence: {torch.softmax(logits, dim=1)[0].max().item():.4f}")
63
+ print("="*60)
64
+
65
+
66
+
67
+ ============================================================
68
+ EXAMPLE 1 - Are these paraphrases?
69
+ ============================================================
70
+ Sentence 1: The cat is sleeping on the mat
71
+ Sentence 2: The cat is napping on the mat
72
+ Prediction: YES (paraphrases)
73
+ Confidence: 0.9998
74
+
75
+ ============================================================
76
+ EXAMPLE 2 - Are these paraphrases?
77
+ ============================================================
78
+ Sentence 1: The dog is barking loudly
79
+ Sentence 2: I love eating pizza
80
+ Prediction: NO (not paraphrases)
81
+ Confidence: 0.9584
82
+ ============================================================