xymeow7's picture
Update trainer.py
d355949 verified
raw
history blame
12.1 kB
import numpy as np
import torch
import torch.nn as nn
# import torch.nn.functional as F
import torch.optim as optim
# from torch.autograd import Variable
#import torch.distributed as dist
# import time
import os
import re
# import sys
# import io
from tqdm import tqdm
import nltk
from lstm_model_new import LSTM_model, BiLSTMModel
from max_ent_model import MaxEntropyModel
from svm_model import SVM
nltk.download('punkt')
class Trainer(object):
def __init__(self, vocab_size, sequence_len, batch_size, nn_epochs, model_type):
# vocab_size = 8000
# sequence_len = 150
self.vocab_size = vocab_size
self.vocab_sizeb = self.vocab_size + 1
self.sequence_len = sequence_len
self.model_type = model_type
self.batch_size = batch_size
self.nn_epochs = nn_epochs
self.processed_data_folder = "../preprocessed_data/"
self._load_data()
self._get_model()
# self._setup_optimizer()
pass
def _load_data(self, ):
dict_fn = "yelp_dictionary.npy"
id_to_word = np.load(dict_fn, allow_pickle=True) # .item()
print(type(id_to_word))
print(id_to_word[0], len(id_to_word))
word_to_id = {
id_to_word[idx]: idx for idx in range(len(id_to_word))
}
# word_to_id = {v: k for k, v in id_to_word.items()}
self.word_to_id = word_to_id
# x_train = np.load('../preprocessed_data/x_train.npy')
# y_train = np.load('../preprocessed_data/y_train.npy')
# #x_train = x_train[:10000]
# #y_train = y_train[:10000]
# x_test = np.load('../preprocessed_data/x_test.npy')
# y_test = np.load('../preprocessed_data/y_test.npy')
# x_train_path = os.path.join(self.processed_data_folder, "x_train.npy")
# y_train_path = os.path.join(self.processed_data_folder, "y_train.npy")
# x_test_path = os.path.join(self.processed_data_folder, "x_test.npy")
# y_test_path = os.path.join(self.processed_data_folder, "y_test.npy")
# x_train = np.load(x_train_path)
# y_train = np.load(y_train_path)
# x_test = np.load(x_test_path)
# y_test = np.load(y_test_path)
# self.x_train = x_train
# self.y_train = y_train
# self.x_test = x_test
# self.y_test = y_test
def _get_model(self, ):
if self.model_type == "lstm":
self.model = LSTM_model(self.vocab_sizeb, 800)
elif self.model_type == "bilstm":
self.model = BiLSTMModel(self.vocab_sizeb, 800)
elif self.model_type == "max_ent":
self.model = MaxEntropyModel()
elif self.model_type == "svm":
self.model = SVM()
else:
raise ValueError("Model type not supported")
# self.model.cuda()
if self.model_type in ['lstm', 'bilstm']:
# self.model = self.model.cuda()
model_ckpt_fn = f"{self.model_type}.pth"
self.model.load_state_dict(torch.load(model_ckpt_fn, map_location=torch.device('cpu')))
elif self.model_type in ['max_ent']:
model_ckpt_fn = f"{self.model_type}_ckpt.npy" # max_ent #
model_params = np.load(model_ckpt_fn, allow_pickle=True).item()
features = model_params["features"]
weights = model_params["weights"]
self.model.weights = weights # .tolist()
# print(f"self.model.weights: {self.model.weights[:10]}")
self.model.last_weights = weights # .tolist()
self.model.features = features
# print(f"self.model.features: {list(self.model.features.keys())[:10]}")
elif self.model_type in ['svm']:
model_ckpt_fn = f"{self.model_type}_weights.npy"
model_params = np.load(model_ckpt_fn, allow_pickle=True).item()
w = model_params['w']
b = model_params['b']
self.model.svm_model.w = w
self.model.svm_model.b = b
else:
raise ValueError("Model type not supported")
def _setup_optimizer(self, ):
self.lr = 0.001
self.opt = optim.Adam(self.model.parameters(), lr=self.lr)
def _train(self, ):
train_losses = []
train_accs = []
test_accs = [0.0]
for epoch in range(self.nn_epochs):
print(f"Epoch: {epoch}")
self.model.train()
nn_acc = 0
nn_total = 0
epoch_loss = 0.0
train_permutation_idxes = np.random.permutation(self.y_train.shape[0])
for i in tqdm(range(0, len(self.y_train), self.batch_size)):
batched_x = self.x_train[train_permutation_idxes[i: i + self.batch_size]]
batched_y = self.y_train[train_permutation_idxes[i: i + self.batch_size]]
data = torch.from_numpy(batched_x).long().cuda()
target = torch.from_numpy(batched_y).float().cuda()
self.opt.zero_grad()
loss, predicted_labels = self.model(data, target)
loss.backward()
norm = nn.utils.clip_grad_norm_(self.model.parameters(), 2.0)
self.opt.step()
predicted_labels = predicted_labels >= 0
gts = target >= 0.5
acc = torch.sum((predicted_labels == gts).float()).item()
nn_acc += acc
epoch_loss += loss.item()
nn_total += len(batched_y)
train_acc = float(nn_acc) / float(nn_total)
train_loss = epoch_loss / float(self.batch_size)
train_losses.append(train_loss)
train_accs.append(train_acc)
print(f"[Epoch {epoch}] Train Loss: {train_loss}, Train Acc: {train_acc}")
self._test()
def _process_text(self, input_text):
text = re.sub('[^a-zA-Z \']', '', re.sub('\\\\n', ' ', ','.join(input_text))).lower()
tokens = nltk.word_tokenize(text)
token_ids = [ self.word_to_id.get(token, -1) + 1 for token in tokens ]
token_ids = np.array(token_ids)
token_ids[token_ids > self.vocab_size] = 0
if token_ids.shape[0] > self.sequence_len:
start_index = np.random.randint(token_ids.shape[0 ]- self.sequence_len + 1)
token_ids = token_ids[start_index: (start_index + self.sequence_len)]
else:
token_ids = np.concatenate([token_ids, np.zeros(self.sequence_len - token_ids.shape[0])])
return token_ids
def _process_text_maxent(self, input_text):
text = re.sub('[^a-zA-Z \']', '', re.sub('\\\\n', ' ', ','.join(input_text))).lower()
tokens = nltk.word_tokenize(text)
token_ids = [ self.word_to_id.get(token, -1) + 1 for token in tokens ]
# token_ids = np.array(token_ids)
token_ids = [ str(word_idx) for word_idx in token_ids ]
return token_ids
# token_ids[token_ids > self.vocab_size] = 0
# return token_ids
def _process_text_svm(self, input_text):
text = re.sub('[^a-zA-Z \']', '', re.sub('\\\\n', ' ', ','.join(input_text))).lower()
tokens = self.model.vectorizer.transform([text]).toarray()
# tokens = nltk.word_tokenize(text)
# token_ids = [ self.word_to_id.get(token, -1) + 1 for token in tokens ]
# # token_ids = np.array(token_ids)
# token_ids = [ str(word_idx) for word_idx in token_ids ]
return tokens
def predict_maxent(self, input_text):
text_ids = self._process_text_maxent(input_text)
prob = self.model.calculate_probability(text_ids)
prob.sort(reverse=True)
# print(label, prob)
print(prob)
##### Calculate whether the prediction is correct #####
maxx_prob_idx = int(prob[0][1])
# data = torch.from_numpy(text_ids).long() # .cuda()
# data = data.unsqueeze(0)
# target = torch.zeros((data.size(0), ), dtype=torch.float)
# # print(f"data: {data.shape}, target: {target.shape}")
# with torch.no_grad():
# loss, predicted_labels = self.model(data, target)
# predicted_labels = predicted_labels >= 0
if maxx_prob_idx == 2:
return "Positive"
else:
return "Negative"
def predict_svm(self, input_text):
text_ids = self._process_text_svm(input_text)
predicted_label = self.model.svm_model.predict(text_ids)
if float(predicted_label[0]) > 0:
return "Positive"
else:
return "Negative"
# prob = self.model.calculate_probability(text_ids)
# prob.sort(reverse=True)
# # print(label, prob)
# print(prob)
# ##### Calculate whether the prediction is correct #####
# maxx_prob_idx = int(prob[0][1])
# # data = torch.from_numpy(text_ids).long() # .cuda()
# # data = data.unsqueeze(0)
# # target = torch.zeros((data.size(0), ), dtype=torch.float)
# # # print(f"data: {data.shape}, target: {target.shape}")
# # with torch.no_grad():
# # loss, predicted_labels = self.model(data, target)
# # predicted_labels = predicted_labels >= 0
# if maxx_prob_idx == 2:
# return "Positive"
# else:
# return "Negative"
def predict(self, input_text):
text_ids = self._process_text(input_text)
data = torch.from_numpy(text_ids).long() # .cuda()
data = data.unsqueeze(0)
target = torch.zeros((data.size(0), ), dtype=torch.float)
# print(f"data: {data.shape}, target: {target.shape}")
with torch.no_grad():
loss, predicted_labels = self.model(data, target)
predicted_labels = predicted_labels >= 0
if predicted_labels.item():
return "Positive"
else:
return "Negative"
# return predicted_labels.item()
def _test(self, ):
self.model.eval()
nn_acc = 0
loss = 0
nn_total = 0
test_permutation_idxes = np.random.permutation(self.y_test.shape[0])
for i in tqdm(range(0, len(self.y_test), self.batch_size)):
batched_x = self.x_test[test_permutation_idxes[i: i + self.batch_size]]
batched_y = self.y_test[test_permutation_idxes[i: i + self.batch_size]]
data = torch.from_numpy(batched_x).long().cuda()
target = torch.from_numpy(batched_y).float().cuda()
with torch.no_grad():
loss, predicted_labels = self.model(data, target)
predicted_labels = predicted_labels >= 0
gts = target >= 0.5
acc = torch.sum((predicted_labels == gts).float()).item()
nn_acc += acc
nn_total += len(batched_y)
acc = float(nn_acc) / float(nn_total)
print(f"Test Acc: {acc}")
if __name__=='__main__':
vocab_size = 8000
sequence_len = 150
# batch_size = 1024
batch_size = 256
nn_epochs = 20
model_type = "lstm"
model_type = "bilstm"
trainer = Trainer(vocab_size, sequence_len, batch_size, nn_epochs, model_type)
trainer._train()
# CUDA_VISIBLE_DEVICES=0 python trainer.py