|
--- |
|
library_name: transformers |
|
tags: [] |
|
--- |
|
``` |
|
import pandas as pd |
|
import re |
|
import nltk |
|
from nltk.corpus import stopwords |
|
from nltk.stem import WordNetLemmatizer |
|
from transformers import BertTokenizer, BertForSequenceClassification |
|
import torch |
|
from safetensors.torch import load_file |
|
|
|
def evaluate(test_data): |
|
|
|
tokenizer = BertTokenizer.from_pretrained("CIS5190-PROJ/BERTv3") |
|
model = BertForSequenceClassification.from_pretrained("CIS5190-PROJ/BERTv3") |
|
|
|
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
|
model.to(device) |
|
model.eval() |
|
|
|
|
|
test_texts = test_data['title'].tolist() |
|
test_encodings = tokenizer(test_texts, truncation=True, padding="max_length", max_length=64) |
|
test_encodings = {key: torch.tensor(val).to(device) for key, val in test_encodings.items()} |
|
with torch.no_grad(): |
|
outputs = model(**test_encodings) |
|
logits = outputs.logits |
|
predictions = torch.argmax(logits, dim=1).cpu().numpy() |
|
return 1- predictions |
|
``` |