bertnn
/

whole

Model card Files Files and versions Community

whole / predefined.py

bertnn

Update predefined.py

d7a6577 about 3 years ago

raw

history blame contribute delete

27.4 kB


	## Import required libraries
	from datetime import datetime
	import numpy as np
	import pandas as pd
	import random
	from transformers import BertTokenizer, BertModel
	import logging
	import matplotlib.pyplot as plt
	tokenizer = BertTokenizer.from_pretrained('bert-large-uncased')#bert-large-uncased
	import itertools
	from sklearn.preprocessing import StandardScaler
	from itertools import cycle,islice
	from random import sample
	import torch
	import torch.nn as nn
	from torch.utils.data import DataLoader, RandomSampler, SequentialSampler
	import torch.nn.functional as F


	if torch.cuda.is_available():
	device = torch.device("cuda")
	print(f'There are {torch.cuda.device_count()} GPU(s) available.')
	print('Device name:', torch.cuda.get_device_name(0))

	else:
	print('No GPU available, using the CPU instead.')
	device = torch.device("cpu")

	# Load and normalize the refrence EPA dictionary
	def load_dictionary(file):
	df=pd.read_csv(file).reset_index().rename(columns={"index": 'index_in_dic'})
	df['term2']=df['term']
	df.term=df.term.str.replace("_", " ")
	df['len_Bert']=df.apply(lambda x: len(tokenizer.tokenize(x['term'])),axis=1)
	# df=add_cluster(df)
	return(df)

	Modifiers =load_dictionary("FullSurveyorInteract_Modifiers.csv")
	Behaviors=load_dictionary("FullSurveyorInteract_Behaviors.csv")
	Identities=load_dictionary("FullSurveyorInteract_Identities.csv")

	n_Modifiers = Modifiers.copy()
	n_Behaviors =Behaviors.copy()
	n_Identities = Identities.copy()

	scaler_B = StandardScaler()
	scaler_M = StandardScaler()
	scaler_I = StandardScaler()

	n_Behaviors[['E','P','A']] = scaler_B.fit_transform(Behaviors[['E','P','A']])
	n_Modifiers[['E','P','A']] = scaler_M.fit_transform(Modifiers[['E','P','A']])
	n_Identities[['E','P','A']] = scaler_I.fit_transform(Identities[['E','P','A']])


	# Ref: https://mccormickml.com/2019/05/14/BERT-word-embeddings-tutorial/

	rnd_st=42

	# Load the BERT tokenizer
	tokenizer = BertTokenizer.from_pretrained('bert-large-uncased', do_lower_case=True)

	# Create a function to tokenize a set of texts
	def preprocessing_for_bert(data,MAX_LEN=40):
	"""Perform required preprocessing steps for pretrained BERT.
	@param data (np.array): Array of texts to be processed.
	@return input_ids (torch.Tensor): Tensor of token ids to be fed to a model.
	@return attention_masks (torch.Tensor): Tensor of indices specifying which
	tokens should be attended to by the model.
	"""
	# Create empty lists to store outputs
	input_ids = []
	attention_masks = []

	# For every sentence...
	for sent in data:
	# `encode_plus` will:
	# (1) Tokenize the sentence
	# (2) Add the `[CLS]` and `[SEP]` token to the start and end
	# (3) Truncate/Pad sentence to max length
	# (4) Map tokens to their IDs
	# (5) Create attention mask
	# (6) Return a dictionary of outputs
	encoded_sent = tokenizer.encode_plus(
	text=sent, # Preprocess sentence
	add_special_tokens=True, # Add `[CLS]` and `[SEP]`
	max_length=MAX_LEN, # Max length to truncate/pad
	# pad_to_max_length=True, # Pad sentence to max length
	padding='max_length',
	#return_tensors='pt', # Return PyTorch tensor
	return_attention_mask=True # Return attention mask
	)

	# Add the outputs to the lists
	input_ids.append(encoded_sent.get('input_ids'))
	attention_masks.append(encoded_sent.get('attention_mask'))

	# Convert lists to tensors
	input_ids = torch.tensor(input_ids[0])
	attention_masks = torch.tensor(attention_masks[0])

	return input_ids, attention_masks






	# # Convert other data types to torch.Tensor
	# from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
	def gnrtr2(Identity,Behavior,Modifier):
	ident1=Identity.sample(axis = 0)
	ident2=Identity.sample(axis = 0)
	behav=Behavior.sample(axis = 0)
	modif1=Modifier.sample(axis = 0)
	modif2=Modifier.sample(axis = 0)
	id1=list(ident1.term)
	id2=list(ident2.term)
	beh=list(behav.term)
	mod1=list(modif1.term)
	mod2=list(modif2.term)
	sents=' '.join(map(str, (mod1+id1+beh+mod2+id2)))
	values=np.concatenate([(modif1[['E','P','A']]).to_numpy(),
	(ident1[['E','P','A']]).to_numpy(),
	(behav[['E','P','A']]).to_numpy(),
	(modif2[['E','P','A']]).to_numpy(),
	(ident2[['E','P','A']]).to_numpy()], axis=1)[0]
	indexx=torch.tensor([[(modif1['index_in_dic']).to_numpy()][0][0],
	[(ident1['index_in_dic']).to_numpy()][0][0],
	[(behav['index_in_dic']).to_numpy()][0][0],
	[(modif2['index_in_dic']).to_numpy()][0][0],
	[(ident2['index_in_dic']).to_numpy()][0][0]])
	ys= torch.tensor(values)
	inputs, masks = preprocessing_for_bert([sents])
	# data=TensorDataset(inputs, masks, ys)

	yield inputs, masks, ys,indexx #torch.tensor(sents),
	# For fine-tuning BERT, the authors recommend a batch size of 16 or 32.
	def dta_ldr2(I,B,M,batch_size=32):
	dt_ldr= [x for x in DataLoader([next(gnrtr2(I,B,M)) for x in range(batch_size)], batch_size=batch_size)][0]
	return(dt_ldr)



	# # Convert other data types to torch.Tensor

	# For fine-tuning BERT, the authors recommend a batch size of 16 or 32.
	def dta_ldr(I,B,M,batch_size=32):
	dt_ldr= [x for x in DataLoader([next(gnrtr(I,B,M)) for x in range(batch_size)], batch_size=batch_size)][0]
	return(dt_ldr)
	class BertRegressor(nn.Module):
	"""Bert Model for Regression Tasks.
	"""
	def __init__(self, freeze_bert=False):
	"""
	@param bert: a BertModel object
	@param classifier: a torch.nn.Module regressor
	@param freeze_bert (bool): Set `False` to fine-tune the BERT model
	"""
	super(BertRegressor, self).__init__()
	# Specify hidden size of BERT, hidden size of our regressor, and number of independent variables
	D_in, H, D_out = 1024, 120, 15

	# Instantiate BERT model
	self.bert = BertModel.from_pretrained('bert-large-uncased')

	# Instantiate an one-layer feed-forward classifier
	self.regressor = nn.Sequential(
	nn.Dropout(0.4),
	nn.Linear(D_in, H),
	nn.Dropout(0.3),
	nn.ReLU(),
	nn.Dropout(0.3),
	nn.Linear(H, D_out)
	)
	# for name, param in list(self.bert.named_parameters())[:-90]:#-20#-90 #-196 #-4 very very slow in training
	# print('I will be frozen: {}'.format(name))
	# param.requires_grad = False

	# Freeze the BERT model
	if freeze_bert:
	for param in self.bert.parameters():
	param.requires_grad = False

	def forward(self, input_ids, attention_mask):
	"""
	Feed input to BERT and the classifier to compute logits.
	@param input_ids (torch.Tensor): an input tensor with shape (batch_size,
	max_length)
	@param attention_mask (torch.Tensor): a tensor that hold attention mask
	information with shape (batch_size, max_length)
	@return logits (torch.Tensor): an output tensor with shape (batch_size,
	num_labels)
	"""
	# Feed input to BERT
	outputs = self.bert(input_ids=input_ids,
	attention_mask=attention_mask)

	# Extract the last hidden state of the token `[CLS]` for regression task
	last_hidden_state_cls = outputs[0][:, 0, :]

	# Feed input to classifier to compute predictions
	predictions = self.regressor(last_hidden_state_cls)#.float()

	return predictions#.float()
	from transformers import AdamW, get_linear_schedule_with_warmup

	def initialize_model(epochs=4):
	"""Initialize the Bert Classifier, the optimizer and the learning rate scheduler.
	"""
	# Instantiate Bert Classifier
	bert_regressor = BertRegressor(freeze_bert=False)

	# Tell PyTorch to run the model on GPU
	bert_regressor.to(device)

	# Create the optimizer
	optimizer = AdamW(bert_regressor.parameters(),
	lr=2e-5, # Smaller LR
	eps=1e-8, # Default epsilon value
	weight_decay =0.001 # Decoupled weight decay to apply.
	)

	# Total number of training steps
	total_steps = 100000#len(train_dataloader) * epochs

	# Set up the learning rate scheduler
	scheduler = get_linear_schedule_with_warmup(optimizer,
	num_warmup_steps=0, # Default value
	num_training_steps=total_steps)
	return bert_regressor, optimizer, scheduler
	import random
	import time

	# Specify loss function
	loss_fn = nn.MSELoss()

	def set_seed(seed_value=42):
	"""Set seed for reproducibility.
	"""
	random.seed(seed_value)
	np.random.seed(seed_value)
	torch.manual_seed(seed_value)
	torch.cuda.manual_seed_all(seed_value)

	def train(model, I_trn,B_trn,M_trn,I_tst,B_tst,M_tst,
	batch_size_tst=32, batch_size=50,batch_epochs=400, evaluation=False,batch_size_trn=32):
	"""Train the BertClassifier model.
	"""
	#initialize val_loss with something big to prevent initialization error
	# val_loss=10
	# Start training loop
	print("Start training...\n")
	# =======================================
	# Training
	# =======================================
	# Print the header of the result table
	print(f" {'Batch':^5} \| {'Train Loss':^12} \| {'Val Loss':^10} \| {'Elapsed':^9}")
	print("-"*50)
	# Measure the elapsed time of each epoch
	t0_batch = time.time()
	# Reset tracking variables at the beginning of each epoch
	batch_loss, batch_counts = 0, 0
	# Put the model into the training mode
	model.train()
	# For each batch of training data...
	for batch in range(batch_epochs): #298
	batch_counts +=1
	if ((batch==(456))):break #1451#246
	# if val_loss<0.3: break
	# Load batch to GPU
	b_input_ids, b_attn_mask, b_ys,_ = tuple(t.to(device) for t in dta_ldr(I=I_trn,B=B_trn,M=M_trn,batch_size=batch_size_trn))
	# Zero out any previously calculated gradients
	model.zero_grad()
	# Perform a forward pass. This will return logits.
	# print(b_input_ids,'Mask:\n',b_attn_mask)
	preds = model(b_input_ids, b_attn_mask)
	# Compute loss
	loss = loss_fn(preds.float(), b_ys.float())
	batch_loss += loss.item()
	# Perform a backward pass to calculate gradients
	loss.backward()
	# Clip the norm of the gradients to 1.0 to prevent "exploding gradients"
	torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
	# Update parameters and the learning rate
	optimizer.step()
	scheduler.step()

	# Print the loss values and time elapsed for every 20 batches
	if (batch_counts % 50 == 0 and batch_counts != 0) : #or(batch>585)
	# Calculate time elapsed for 20 batches
	time_elapsed = time.time() - t0_batch

	# Print training results
	val_loss = evaluate(model, Ie=I_tst,Be=B_tst,Me=M_tst,batch_size_e=batch_size_tst)
	print(f"{batch+ 1:^7}\|{batch_loss / batch_counts:^12.6f} \| {val_loss:^10.6f} \| {time_elapsed:^9.2f}") #\| {step:^7}
	# After the completion of each training epoch, measure the model's performance
	# on our validation set.
	print("-"*50)
	# print(batch)

	# if (batch<586):
	# # Reset batch tracking variables
	# batch_loss, batch_counts = 0, 0
	# t0_batch = time.time()
	# # Reset batch tracking variables
	batch_loss, batch_counts = 0, 0
	t0_batch = time.time()

	# Calculate the average loss over the entire training data
	# avg_train_loss = total_loss / (batch_size*batch_epochs)

	# =======================================
	# Evaluation
	# =======================================
	if evaluation == True:
	# After the completion of each training epoch, measure the model's performance
	# on our validation set.
	val_loss = evaluate(model, Ie=I_tst,Be=B_tst,Me=M_tst,batch_size_e=batch_size_tst)
	if val_loss<0.32:
	print('\n Consider this one with val:', val_loss,' at:',batch,'\n')
	print("-"*50)



	# Calculate the average loss over the entire training data
	# avg_train_loss = total_loss / (batch_size*batch_epochs)

	val_loss = evaluate(model, Ie=I_tst,Be=B_tst,Me=M_tst,batch_size_e=batch_size_tst)
	print(f"{batch+ 1:^7}\|{batch_loss / batch_counts:^12.6f} \| {val_loss:^10.6f} \| {time_elapsed:^9.2f}") #\| {step:^7}
	print("Training complete!")


	def evaluate(model, Ie,Be,Me,batch_size_e):
	"""After the completion of each training epoch, measure the model's performance
	on our validation set.
	"""
	# Put the model into the evaluation mode. The dropout layers are disabled during
	# the test time.
	model.eval()

	# Tracking variables
	val_loss = []

	# For each batch in our validation set...
	for batch in range(1):
	# Load batch to GPU
	b_input_ids, b_attn_mask, b_ys,_ = tuple(t.to(device) for t in dta_ldr2(Ie,Be,Me,batch_size_e))

	# Compute logits
	with torch.no_grad():
	preds = model(b_input_ids, b_attn_mask)

	# Compute loss
	loss = loss_fn(preds, b_ys)
	val_loss.append(loss.item())


	# Compute the absolutr error and loss over the validation set.
	val_loss = np.mean(val_loss)

	return val_loss



	bert_regressor = BertRegressor()
	bert_regressor.load_state_dict(torch.load("MABMO_product",map_location=torch.device(device)))
	bert_regressor.eval()




	def bert_predict(model, test_dataloader):
	"""Perform a forward pass on the trained BERT model to predict probabilities
	on the test set.
	"""
	# Put the model into the evaluation mode. The dropout layers are disabled during
	# the test time.
	model.eval()
	all_preds = []
	# For each batch in our test set...
	for batch in range(1):
	# Load batch to GPU
	b_input_ids, b_attn_mask = tuple(t.to(device) for t in test_dataloader)[:2]

	# Compute predictions
	with torch.no_grad():
	preds = model(b_input_ids, b_attn_mask)#.to(device)
	all_preds.append(preds)

	# Concatenate predictions from each batch
	all_preds = torch.cat(all_preds, dim=0)

	return all_preds
	def out_df(data,predictions,df_beh=Behaviors,df_ident=Identities,df_mod=Modifiers):
	df2=pd.concat([pd.DataFrame(scaler_M.inverse_transform(predictions[:,0:3].cpu())),
	pd.DataFrame(scaler_M.inverse_transform(data[2][:,0:3])),
	pd.DataFrame(scaler_I.inverse_transform(predictions[:,3:6].cpu())),
	pd.DataFrame(scaler_I.inverse_transform(data[2][:,3:6])),
	pd.DataFrame(scaler_B.inverse_transform(predictions[:,6:9].cpu())),
	pd.DataFrame(scaler_B.inverse_transform(data[2][:,6:9])),
	pd.DataFrame(scaler_M.inverse_transform(predictions[:,9:12].cpu())),
	pd.DataFrame(scaler_M.inverse_transform(data[2][:,9:12])),
	pd.DataFrame(scaler_I.inverse_transform(predictions[:,12:15].cpu())),
	pd.DataFrame(scaler_I.inverse_transform(data[2][:,12:15])),pd.DataFrame(np.array(data[3]))
	],axis=1).set_axis(['EEMA', 'EPMA', 'EAMA','EM1', 'PM1', 'AM1',
	'EEA', 'EPA', 'EAA','EA', 'PA', 'AA',
	'EEB', 'EPB', 'EAB','EB', 'PB', 'AB',
	'EEMO', 'EPMO', 'EAMO','EM2', 'PM2', 'AM2',
	'EEO', 'EPO', 'EAO','EO', 'PO', 'AO',
	'idx_ModA','idx_Act','idx_Beh','idx_ModO','idx_Obj'], axis=1, inplace=False)
	df2=pd.merge(df2, df_mod[['term','index_in_dic']], left_on= ['idx_ModA'], right_on = ["index_in_dic"],
	how='left').rename(columns={"term": 'ModA'}).drop(['index_in_dic'], axis=1)
	df2=pd.merge(df2, df_ident[['term','index_in_dic']], left_on= ['idx_Act'], right_on = ["index_in_dic"],
	how='left').rename(columns={"term": 'Actor'}).drop(['index_in_dic'], axis=1)
	df2=pd.merge(df2, df_beh[['term','index_in_dic']], left_on= ['idx_Beh'], right_on = ["index_in_dic"],
	how='left').rename(columns={"term": 'Behavior'}).drop(['index_in_dic'], axis=1)
	df2=pd.merge(df2, df_mod[['term','index_in_dic']], left_on= ['idx_ModO'], right_on = ["index_in_dic"],
	how='left').rename(columns={"term": 'ModO'}).drop(['index_in_dic'], axis=1)
	df2=pd.merge(df2, df_ident[['term','index_in_dic']], left_on= ['idx_Obj'], right_on = ["index_in_dic"],
	how='left').rename(columns={"term": 'Object'}).drop(['index_in_dic'], axis=1)

	df2=df2[['EEMA','EPMA', 'EAMA', 'EEA', 'EPA', 'EAA', 'EEB', 'EPB', 'EAB','EEMO', 'EPMO', 'EAMO', 'EEO', 'EPO', 'EAO','EM1', 'PM1', 'AM1','EA', 'PA', 'AA', 'EB', 'PB','AB', 'EM2', 'PM2', 'AM2', 'EO',
	'PO', 'AO', 'ModA','Actor','Behavior', 'ModO', 'Object']]
	return(df2)

	def get_output(I_b=n_Identities,B_b=n_Behaviors,M_b=n_Modifiers,batch_sz=3000,batch_num=10):
	df=pd.DataFrame()
	for i in range(batch_num):
	q=dta_ldr2(I=I_b,B=B_b,M=M_b,batch_size=batch_sz)
	preds = bert_predict(bert_regressor.to(device), q)
	df2=out_df(data=q,predictions=preds)
	df=pd.concat([df,df2],axis=0)
	return(df)
	def gen_new(Identity,Behavior,Modifier,n_df,word_type):
	if word_type=='identity':
	ident1=n_df.sample(axis = 0,random_state=56)
	else:ident1=Identity.sample(axis = 0,random_state=6)
	ident2=Identity.sample(axis = 0,random_state=6)
	if word_type=='behavior':
	behav=n_df.sample(axis = 0,random_state=5)
	else: behav=Behavior.sample(axis = 0,random_state=5)
	if word_type=='modifier':
	modif1=n_df.sample(axis = 0,random_state=55)
	else: modif1=Modifier.sample(axis = 0)
	modif2=Modifier.sample(axis = 0,random_state=96)
	id1=list(ident1.term)
	id2=list(ident2.term)
	beh=list(behav.term)
	mod1=list(modif1.term)
	mod2=list(modif2.term)
	# wrdvc_ident1=gs_model.get_vector((list(ident1.trm_org))[0], norm=True)
	sents=' '.join(map(str, (mod1+id1+beh+mod2+id2)))
	values=np.concatenate([(modif1[['E','P','A']]).to_numpy(),
	(ident1[['E','P','A']]).to_numpy(),
	(behav[['E','P','A']]).to_numpy(),
	(modif2[['E','P','A']]).to_numpy(),
	(ident2[['E','P','A']]).to_numpy()], axis=1)[0]
	# print(values)
	#indexx=[(ident1['index_in_dic']).to_numpy()][0][0]
	indexx=torch.tensor([[(modif1['index_in_dic']).to_numpy()][0][0],
	[(ident1['index_in_dic']).to_numpy()][0][0],
	[(behav['index_in_dic']).to_numpy()][0][0],
	[(modif2['index_in_dic']).to_numpy()][0][0],
	[(ident2['index_in_dic']).to_numpy()][0][0]])
	ys= torch.tensor(values)


	inputs, masks = preprocessing_for_bert([sents])
	# data=TensorDataset(inputs, masks, ys)

	yield inputs, masks, ys,indexx #torch.tensor(sents),
	def ldr_new(I,B,M,N_df,WT,batch_size=32):
	dt_ldr= [x for x in DataLoader([next(gen_new(I,B,M,N_df,WT)) for x in range(batch_size)], batch_size=batch_size)][0]
	return(dt_ldr)


	cols=['EEMA', 'EPMA', 'EAMA', 'EEA', 'EPA', 'EAA', 'EEB', 'EPB', 'EAB',
	'EEMO', 'EPMO', 'EAMO', 'EEO', 'EPO', 'EAO', 'ModA', 'Actor', 'Behavior', 'ModO', 'Object']
	def get_output_new(w,wt,I_b=n_Identities,B_b=n_Behaviors,M_b=n_Modifiers,batch_sz=300,batch_num=1,columnss=cols,cus_col=1):

	df=pd.DataFrame()
	for i in range(batch_num):
	new_df=pd.DataFrame({'index_in_dic':1000,'term':w,'E':10,'P':10,'A':10,'E2':10,'P2':10,'A2':10,'term2':w,'len_Bert':3}, index=[0])
	q=ldr_new(I=I_b,B=B_b,M=M_b,N_df=new_df,WT=wt,batch_size=batch_sz)
	preds = bert_predict(bert_regressor.to(device), q)
	if wt=='identity':
	df_identity=pd.concat([Identities,new_df],axis=0)
	df2=out_df(data=q,predictions=preds,df_ident=df_identity)
	if cus_col:
	columnss=[ 'EEA', 'EPA', 'EAA', 'ModA', 'Actor', 'Behavior', 'ModO', 'Object']


	if wt=='behavior':
	df_behavior=pd.concat([Behaviors,new_df],axis=0)
	df2=out_df(data=q,predictions=preds,df_beh=df_behavior)
	if cus_col:
	columnss=['EEB', 'EPB', 'EAB', 'ModA', 'Actor', 'Behavior', 'ModO', 'Object']
	if wt=='modifier':
	df_modifier=pd.concat([Modifiers,new_df],axis=0)
	df2=out_df(data=q,predictions=preds,df_mod=df_modifier)
	if cus_col:
	columnss=['EEMA', 'EPMA', 'EAMA', 'ModA', 'Actor', 'Behavior', 'ModO', 'Object']
	df=pd.concat([df,df2],axis=0)
	return(df[columnss])

	def gen_new(Identity,Behavior,Modifier,n_df,word_type):
	if word_type=='identity':
	ident1=n_df.sample(axis = 0)
	else:ident1=Identity.sample(axis = 0)
	ident2=Identity.sample(axis = 0)
	if word_type=='behavior':
	behav=n_df.sample(axis = 0)
	else: behav=Behavior.sample(axis = 0)
	if word_type=='modifier':
	modif1=n_df.sample(axis = 0)
	else: modif1=Modifier.sample(axis = 0)
	modif2=Modifier.sample(axis = 0)
	id1=list(ident1.term)
	id2=list(ident2.term)
	beh=list(behav.term)
	mod1=list(modif1.term)
	mod2=list(modif2.term)
	# wrdvc_ident1=gs_model.get_vector((list(ident1.trm_org))[0], norm=True)
	sents=' '.join(map(str, (mod1+id1+beh+mod2+id2)))
	values=np.concatenate([(modif1[['E','P','A']]).to_numpy(),
	(ident1[['E','P','A']]).to_numpy(),
	(behav[['E','P','A']]).to_numpy(),
	(modif2[['E','P','A']]).to_numpy(),
	(ident2[['E','P','A']]).to_numpy()], axis=1)[0]
	# print(values)
	#indexx=[(ident1['index_in_dic']).to_numpy()][0][0]
	indexx=torch.tensor([[(modif1['index_in_dic']).to_numpy()][0][0],
	[(ident1['index_in_dic']).to_numpy()][0][0],
	[(behav['index_in_dic']).to_numpy()][0][0],
	[(modif2['index_in_dic']).to_numpy()][0][0],
	[(ident2['index_in_dic']).to_numpy()][0][0]])
	ys= torch.tensor(values)
	inputs, masks = preprocessing_for_bert([sents])
	# data=TensorDataset(inputs, masks, ys)

	yield inputs, masks, ys,indexx #torch.tensor(sents),
	def ldr_new(I,B,M,N_df,WT,batch_size=32):
	dt_ldr= [x for x in DataLoader([next(gen_new(I,B,M,N_df,WT)) for x in range(batch_size)], batch_size=batch_size)][0]
	return(dt_ldr)



	def sent_gen(sentence):
	sents=sentence
	indexx=torch.tensor([1,1,1,1,1,1,1,1,1,1,1,1])
	ys= torch.tensor([1,1,1,1,1,1,1,1,1,1,1,1])
	inputs, masks = preprocessing_for_bert([sents])
	yield inputs, masks, ys,indexx #torch.tensor(sents),
	def sent_ldr(sent2,batch_size=1):
	dt_ldr= [x for x in DataLoader([next(sent_gen(sent2)) for x in range(batch_size)], batch_size=batch_size)][0]
	return(dt_ldr)
	def EPA_sents(sent):
	q=sent_ldr(sent)
	predictions=bert_predict(bert_regressor.to(device), q)
	df_out=pd.concat([pd.DataFrame(scaler_M.inverse_transform(predictions[:,0:3].cpu())),
	pd.DataFrame(scaler_I.inverse_transform(predictions[:,3:6].cpu())),
	pd.DataFrame(scaler_B.inverse_transform(predictions[:,6:9].cpu())),
	pd.DataFrame(scaler_M.inverse_transform(predictions[:,9:12].cpu())),
	pd.DataFrame(scaler_I.inverse_transform(predictions[:,12:15].cpu()))
	],axis=1).set_axis(['EEMA', 'EPMA', 'EAMA',
	'EEA', 'EPA', 'EAA', 'EEB', 'EPB', 'EAB',
	'EEMO', 'EPMO', 'EAMO','EEO', 'EPO', 'EAO'], axis=1, inplace=False)
	return(df_out.round(decimals=2))

	# Ref: https://stackoverflow.com/questions/28778668/freeze-header-in-pandas-dataframe

	from ipywidgets import interact, IntSlider
	from IPython.display import display

	def freeze_header(df, num_rows=30, num_columns=10, step_rows=1,
	step_columns=1):
	"""
	Freeze the headers (column and index names) of a Pandas DataFrame. A widget
	enables to slide through the rows and columns.

	Parameters
	----------
	df : Pandas DataFrame
	DataFrame to display
	num_rows : int, optional
	Number of rows to display
	num_columns : int, optional
	Number of columns to display
	step_rows : int, optional
	Step in the rows
	step_columns : int, optional
	Step in the columns

	Returns
	-------
	Displays the DataFrame with the widget
	"""
	@interact(last_row=IntSlider(min=min(num_rows, df.shape[0]),
	max=df.shape[0],
	step=step_rows,
	description='rows',
	readout=False,
	disabled=False,
	continuous_update=True,
	orientation='horizontal',
	slider_color='purple'),
	last_column=IntSlider(min=min(num_columns, df.shape[1]),
	max=df.shape[1],
	step=step_columns,
	description='columns',
	readout=False,
	disabled=False,
	continuous_update=True,
	orientation='horizontal',
	slider_color='purple'))
	def _freeze_header(last_row, last_column):
	display(df.iloc[max(0, last_row-num_rows):last_row,
	max(0, last_column-num_columns):last_column])