HW3 / test.py
ArnaudVH's picture
Final Train and Test files
dea7034
from transformers import pipeline, AutoTokenizer
from split_data import make_test_data
import torch
from torchmetrics.classification import MulticlassConfusionMatrix
label2id = {
"POSITIVE": 1,
"NEGATIVE": 0,
}
# Load tokenizer and classifier with truncation
tokenizer = AutoTokenizer.from_pretrained("./finetuned")
classifier = pipeline("sentiment-analysis", model="./finetuned", tokenizer=tokenizer, max_length=512, truncation=True)
test_data = make_test_data()
results = classifier(test_data['text'])
true_labels = test_data["label"]
texts = test_data["text"]
# Convert predicted labels and true labels to numerical format
label_to_id = {"NEGATIVE": 0, "POSITIVE": 1}
predicted_labels = []
for text in texts:
prediction = classifier(text)
predicted_label = label_to_id[prediction[0]["label"]]
predicted_labels.append(predicted_label)
predicted_tensor = torch.tensor(predicted_labels)
true_tensor = torch.tensor(true_labels)
#Get the confusion matrix using Torch Metrics
confusion_matrix = MulticlassConfusionMatrix(num_classes=2)(predicted_tensor, true_tensor)
print("Confusion Matrix")
print(confusion_matrix)