Spaces:
Runtime error
Runtime error
File size: 2,915 Bytes
c6ebbf7 9761446 c6ebbf7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 |
from transformers import BertModel, BertTokenizer, AdamW, get_linear_schedule_with_warmup
from sklearn.metrics import classification_report, f1_score
from torch.utils.data import Dataset, DataLoader
from tqdm.auto import tqdm
from config import params
from torch import nn
import pandas as pd
import numpy as np
import warnings
import random
import torch
import os
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
class Dataset(Dataset):
def __init__(self, texts, max_len):
self.texts = texts
self.tokenizer = BertTokenizer.from_pretrained(params['pretrained_model_name'])
self.max_len = max_len
def __len__(self):
return len(self.texts)
def __getitem__(self, item):
text = str(self.texts[item])
encoding = self.tokenizer.encode_plus(
text,
add_special_tokens=True,
max_length=self.max_len,
return_token_type_ids=False,
pad_to_max_length=True,
return_attention_mask=True,
truncation=True,
return_tensors='pt',
)
return {
'text': text,
'input_ids': encoding['input_ids'].flatten(),
'attention_mask': encoding['attention_mask'].flatten(),
}
class SentimentClassifier(nn.Module):
def __init__(self, n_classes):
super(SentimentClassifier, self).__init__()
self.bert = BertModel.from_pretrained(params['pretrained_model_name'])
self.drop = nn.Dropout(params['dropout'])
self.out = nn.Linear(self.bert.config.hidden_size, n_classes)
def forward(self, input_ids, attention_mask):
bert_output = self.bert(
input_ids=input_ids,
attention_mask=attention_mask,
return_dict=False
)
last_hidden_state, pooled_output = bert_output
output = self.drop(pooled_output)
return self.out(output)
class PredictionModel:
def __init__(self):
self.model = SentimentClassifier(n_classes = 6)
self.loss_fn = nn.CrossEntropyLoss().to(device)
def create_data_loader(self, X_test, max_len, batch_size):
ds = Dataset(
texts= np.array(X_test),
max_len=max_len
)
return DataLoader(
ds,
batch_size=batch_size
)
def predict(self, X_test: list):
data_loader = self.create_data_loader(X_test, params['max_length'], params['batch_size'])
self.model.load_state_dict(torch.load(params['path_to_model_bin']))
self.model.eval()
losses = []
y_pred = []
with torch.no_grad():
for d in data_loader:
input_ids = d["input_ids"].to(device)
attention_mask = d["attention_mask"].to(device)
outputs = self.model(
input_ids=input_ids,
attention_mask=attention_mask
)
_, preds = torch.max(outputs, dim=1)
y_pred += preds.tolist()
return y_pred
|