import torch.optim as optim import numpy as np import torch from torch.autograd import Variable import random from torch.nn.utils import clip_grad_norm import copy import os import pickle def get_decoder_index_XY(batchY): ''' :param batchY: like [0 0 1 0 0 0 0 1] :return: ''' returnX =[] returnY =[] for i in range(len(batchY)): curY = batchY[i] index_1 = np.where(curY==1) decoderY = index_1[0] if len(index_1[0]) ==1: decoderX = np.array([0]) else: decoderX = np.append([0],decoderY[0:-1]+1) returnX.append(decoderX) returnY.append(decoderY) returnX = np.array(returnX) returnY = np.array(returnY) return returnX,returnY def align_variable_numpy(X,maxL,paddingNumber): aligned = [] for cur in X: ext_cur = [] ext_cur.extend(cur) ext_cur.extend([paddingNumber] * (maxL - len(cur))) aligned.append(ext_cur) aligned = np.array(aligned) return aligned def sample_a_sorted_batch_from_numpy(numpyX,numpyY,batch_size,use_cuda): select_index = np.array(range(len(numpyY))) select_index = np.array(range(len(numpyX))) batch_x = [copy.deepcopy(numpyX[i]) for i in select_index] batch_y = [copy.deepcopy(numpyY[i]) for i in select_index] index_decoder_X,index_decoder_Y = get_decoder_index_XY(batch_y) all_lens = np.array([len(x) for x in batch_y]) maxL = np.max(all_lens) idx = np.argsort(all_lens) idx = np.sort(idx) batch_x = [batch_x[i] for i in idx] batch_y = [batch_y[i] for i in idx] all_lens = all_lens[idx] index_decoder_X = np.array([index_decoder_X[i] for i in idx]) index_decoder_Y = np.array([index_decoder_Y[i] for i in idx]) numpy_batch_x = batch_x batch_x = align_variable_numpy(batch_x,maxL,2000001) batch_y = align_variable_numpy(batch_y,maxL,2) batch_x = Variable(torch.from_numpy(np.array(batch_x, dtype="int64"))) return numpy_batch_x,batch_x,batch_y,index_decoder_X,index_decoder_Y,all_lens,maxL class TrainSolver(object): def __init__(self, model,train_x,train_y,dev_x,dev_y,save_path,batch_size,eval_size,epoch, lr,lr_decay_epoch,weight_decay,use_cuda): self.lr = lr self.model = model self.epoch = epoch self.train_x = train_x self.train_y = train_y self.use_cuda = use_cuda self.batch_size = batch_size self.lr_decay_epoch = lr_decay_epoch self.eval_size = eval_size self.dev_x, self.dev_y = dev_x, dev_y self.model = model self.save_path = save_path self.weight_decay =weight_decay def get_batch_micro_metric(self,pre_b, ground_b, x,index2word, fukugen, nloop): tokendic = {} for n,i in enumerate(index2word): tokendic[n] = i sents = [] for i,cur_seq_y in enumerate(ground_b): fuku = fukugen[i] index_of_1 = np.where(cur_seq_y==1)[0] index_pre = pre_b[i] inp = x[i] index_pre = np.array(index_pre) END_B = index_of_1[-1] index_pre = index_pre[index_pre != END_B] index_of_1 = index_of_1[index_of_1 != END_B] index_of_1 = list(index_of_1) index_pre = list(index_pre) FP = [] sent = [] ex = "" sent = [tokendic[int(j.to('cpu').detach().numpy().copy())] for j in inp] for k in index_pre: if k not in index_of_1: FP.append(k) #FP = [int(j.to('cpu').detach().numpy().copy()) for j in FP] for n,k in enumerate(zip(sent, fuku)): f = k[1] i = k[0] if k == "": continue if n in FP: ex += f sents.append(ex) ex = "" else: ex += f sents.append(ex) return sents def check_accuracy(self,data2X,data2Y,index2word, fukugen2): for nloop in range(1): dataY = data2Y[nloop] dataX = data2X[nloop] fukugen = fukugen2[nloop] need_loop = int(np.ceil(len(dataY) / self.batch_size)) for lp in range(need_loop): startN = lp*self.batch_size endN = (lp+1)*self.batch_size if endN > len(dataY): endN = len(dataY) fukuge = fukugen[startN:endN] numpy_batch_x, batch_x, batch_y, index_decoder_X, index_decoder_Y, all_lens, maxL = sample_a_sorted_batch_from_numpy( dataX[startN:endN], dataY[startN:endN], None, self.use_cuda) batch_ave_loss, batch_boundary, batch_boundary_start, batch_align_matrix = self.model.predict(batch_x,index_decoder_Y,all_lens) output_texts = self.get_batch_micro_metric(batch_boundary,batch_y,batch_x,index2word, fukuge, nloop) return output_texts