whole / predefined.py
bertnn's picture
Update predefined.py
d7a6577
## Import required libraries
from datetime import datetime
import numpy as np
import pandas as pd
import random
from transformers import BertTokenizer, BertModel
import logging
import matplotlib.pyplot as plt
tokenizer = BertTokenizer.from_pretrained('bert-large-uncased')#bert-large-uncased
import itertools
from sklearn.preprocessing import StandardScaler
from itertools import cycle,islice
from random import sample
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, RandomSampler, SequentialSampler
import torch.nn.functional as F
if torch.cuda.is_available():
device = torch.device("cuda")
print(f'There are {torch.cuda.device_count()} GPU(s) available.')
print('Device name:', torch.cuda.get_device_name(0))
else:
print('No GPU available, using the CPU instead.')
device = torch.device("cpu")
# Load and normalize the refrence EPA dictionary
def load_dictionary(file):
df=pd.read_csv(file).reset_index().rename(columns={"index": 'index_in_dic'})
df['term2']=df['term']
df.term=df.term.str.replace("_", " ")
df['len_Bert']=df.apply(lambda x: len(tokenizer.tokenize(x['term'])),axis=1)
# df=add_cluster(df)
return(df)
Modifiers =load_dictionary("FullSurveyorInteract_Modifiers.csv")
Behaviors=load_dictionary("FullSurveyorInteract_Behaviors.csv")
Identities=load_dictionary("FullSurveyorInteract_Identities.csv")
n_Modifiers = Modifiers.copy()
n_Behaviors =Behaviors.copy()
n_Identities = Identities.copy()
scaler_B = StandardScaler()
scaler_M = StandardScaler()
scaler_I = StandardScaler()
n_Behaviors[['E','P','A']] = scaler_B.fit_transform(Behaviors[['E','P','A']])
n_Modifiers[['E','P','A']] = scaler_M.fit_transform(Modifiers[['E','P','A']])
n_Identities[['E','P','A']] = scaler_I.fit_transform(Identities[['E','P','A']])
# Ref: https://mccormickml.com/2019/05/14/BERT-word-embeddings-tutorial/
rnd_st=42
# Load the BERT tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-large-uncased', do_lower_case=True)
# Create a function to tokenize a set of texts
def preprocessing_for_bert(data,MAX_LEN=40):
"""Perform required preprocessing steps for pretrained BERT.
@param data (np.array): Array of texts to be processed.
@return input_ids (torch.Tensor): Tensor of token ids to be fed to a model.
@return attention_masks (torch.Tensor): Tensor of indices specifying which
tokens should be attended to by the model.
"""
# Create empty lists to store outputs
input_ids = []
attention_masks = []
# For every sentence...
for sent in data:
# `encode_plus` will:
# (1) Tokenize the sentence
# (2) Add the `[CLS]` and `[SEP]` token to the start and end
# (3) Truncate/Pad sentence to max length
# (4) Map tokens to their IDs
# (5) Create attention mask
# (6) Return a dictionary of outputs
encoded_sent = tokenizer.encode_plus(
text=sent, # Preprocess sentence
add_special_tokens=True, # Add `[CLS]` and `[SEP]`
max_length=MAX_LEN, # Max length to truncate/pad
# pad_to_max_length=True, # Pad sentence to max length
padding='max_length',
#return_tensors='pt', # Return PyTorch tensor
return_attention_mask=True # Return attention mask
)
# Add the outputs to the lists
input_ids.append(encoded_sent.get('input_ids'))
attention_masks.append(encoded_sent.get('attention_mask'))
# Convert lists to tensors
input_ids = torch.tensor(input_ids[0])
attention_masks = torch.tensor(attention_masks[0])
return input_ids, attention_masks
# # Convert other data types to torch.Tensor
# from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
def gnrtr2(Identity,Behavior,Modifier):
ident1=Identity.sample(axis = 0)
ident2=Identity.sample(axis = 0)
behav=Behavior.sample(axis = 0)
modif1=Modifier.sample(axis = 0)
modif2=Modifier.sample(axis = 0)
id1=list(ident1.term)
id2=list(ident2.term)
beh=list(behav.term)
mod1=list(modif1.term)
mod2=list(modif2.term)
sents=' '.join(map(str, (mod1+id1+beh+mod2+id2)))
values=np.concatenate([(modif1[['E','P','A']]).to_numpy(),
(ident1[['E','P','A']]).to_numpy(),
(behav[['E','P','A']]).to_numpy(),
(modif2[['E','P','A']]).to_numpy(),
(ident2[['E','P','A']]).to_numpy()], axis=1)[0]
indexx=torch.tensor([[(modif1['index_in_dic']).to_numpy()][0][0],
[(ident1['index_in_dic']).to_numpy()][0][0],
[(behav['index_in_dic']).to_numpy()][0][0],
[(modif2['index_in_dic']).to_numpy()][0][0],
[(ident2['index_in_dic']).to_numpy()][0][0]])
ys= torch.tensor(values)
inputs, masks = preprocessing_for_bert([sents])
# data=TensorDataset(inputs, masks, ys)
yield inputs, masks, ys,indexx #torch.tensor(sents),
# For fine-tuning BERT, the authors recommend a batch size of 16 or 32.
def dta_ldr2(I,B,M,batch_size=32):
dt_ldr= [x for x in DataLoader([next(gnrtr2(I,B,M)) for x in range(batch_size)], batch_size=batch_size)][0]
return(dt_ldr)
# # Convert other data types to torch.Tensor
# For fine-tuning BERT, the authors recommend a batch size of 16 or 32.
def dta_ldr(I,B,M,batch_size=32):
dt_ldr= [x for x in DataLoader([next(gnrtr(I,B,M)) for x in range(batch_size)], batch_size=batch_size)][0]
return(dt_ldr)
class BertRegressor(nn.Module):
"""Bert Model for Regression Tasks.
"""
def __init__(self, freeze_bert=False):
"""
@param bert: a BertModel object
@param classifier: a torch.nn.Module regressor
@param freeze_bert (bool): Set `False` to fine-tune the BERT model
"""
super(BertRegressor, self).__init__()
# Specify hidden size of BERT, hidden size of our regressor, and number of independent variables
D_in, H, D_out = 1024, 120, 15
# Instantiate BERT model
self.bert = BertModel.from_pretrained('bert-large-uncased')
# Instantiate an one-layer feed-forward classifier
self.regressor = nn.Sequential(
nn.Dropout(0.4),
nn.Linear(D_in, H),
nn.Dropout(0.3),
nn.ReLU(),
nn.Dropout(0.3),
nn.Linear(H, D_out)
)
# for name, param in list(self.bert.named_parameters())[:-90]:#-20#-90 #-196 #-4 very very slow in training
# print('I will be frozen: {}'.format(name))
# param.requires_grad = False
# Freeze the BERT model
if freeze_bert:
for param in self.bert.parameters():
param.requires_grad = False
def forward(self, input_ids, attention_mask):
"""
Feed input to BERT and the classifier to compute logits.
@param input_ids (torch.Tensor): an input tensor with shape (batch_size,
max_length)
@param attention_mask (torch.Tensor): a tensor that hold attention mask
information with shape (batch_size, max_length)
@return logits (torch.Tensor): an output tensor with shape (batch_size,
num_labels)
"""
# Feed input to BERT
outputs = self.bert(input_ids=input_ids,
attention_mask=attention_mask)
# Extract the last hidden state of the token `[CLS]` for regression task
last_hidden_state_cls = outputs[0][:, 0, :]
# Feed input to classifier to compute predictions
predictions = self.regressor(last_hidden_state_cls)#.float()
return predictions#.float()
from transformers import AdamW, get_linear_schedule_with_warmup
def initialize_model(epochs=4):
"""Initialize the Bert Classifier, the optimizer and the learning rate scheduler.
"""
# Instantiate Bert Classifier
bert_regressor = BertRegressor(freeze_bert=False)
# Tell PyTorch to run the model on GPU
bert_regressor.to(device)
# Create the optimizer
optimizer = AdamW(bert_regressor.parameters(),
lr=2e-5, # Smaller LR
eps=1e-8, # Default epsilon value
weight_decay =0.001 # Decoupled weight decay to apply.
)
# Total number of training steps
total_steps = 100000#len(train_dataloader) * epochs
# Set up the learning rate scheduler
scheduler = get_linear_schedule_with_warmup(optimizer,
num_warmup_steps=0, # Default value
num_training_steps=total_steps)
return bert_regressor, optimizer, scheduler
import random
import time
# Specify loss function
loss_fn = nn.MSELoss()
def set_seed(seed_value=42):
"""Set seed for reproducibility.
"""
random.seed(seed_value)
np.random.seed(seed_value)
torch.manual_seed(seed_value)
torch.cuda.manual_seed_all(seed_value)
def train(model, I_trn,B_trn,M_trn,I_tst,B_tst,M_tst,
batch_size_tst=32, batch_size=50,batch_epochs=400, evaluation=False,batch_size_trn=32):
"""Train the BertClassifier model.
"""
#initialize val_loss with something big to prevent initialization error
# val_loss=10
# Start training loop
print("Start training...\n")
# =======================================
# Training
# =======================================
# Print the header of the result table
print(f" {'Batch':^5} | {'Train Loss':^12} | {'Val Loss':^10} | {'Elapsed':^9}")
print("-"*50)
# Measure the elapsed time of each epoch
t0_batch = time.time()
# Reset tracking variables at the beginning of each epoch
batch_loss, batch_counts = 0, 0
# Put the model into the training mode
model.train()
# For each batch of training data...
for batch in range(batch_epochs): #298
batch_counts +=1
if ((batch==(456))):break #1451#246
# if val_loss<0.3: break
# Load batch to GPU
b_input_ids, b_attn_mask, b_ys,_ = tuple(t.to(device) for t in dta_ldr(I=I_trn,B=B_trn,M=M_trn,batch_size=batch_size_trn))
# Zero out any previously calculated gradients
model.zero_grad()
# Perform a forward pass. This will return logits.
# print(b_input_ids,'Mask:\n',b_attn_mask)
preds = model(b_input_ids, b_attn_mask)
# Compute loss
loss = loss_fn(preds.float(), b_ys.float())
batch_loss += loss.item()
# Perform a backward pass to calculate gradients
loss.backward()
# Clip the norm of the gradients to 1.0 to prevent "exploding gradients"
torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
# Update parameters and the learning rate
optimizer.step()
scheduler.step()
# Print the loss values and time elapsed for every 20 batches
if (batch_counts % 50 == 0 and batch_counts != 0) : #or(batch>585)
# Calculate time elapsed for 20 batches
time_elapsed = time.time() - t0_batch
# Print training results
val_loss = evaluate(model, Ie=I_tst,Be=B_tst,Me=M_tst,batch_size_e=batch_size_tst)
print(f"{batch+ 1:^7}|{batch_loss / batch_counts:^12.6f} | {val_loss:^10.6f} | {time_elapsed:^9.2f}") #| {step:^7}
# After the completion of each training epoch, measure the model's performance
# on our validation set.
print("-"*50)
# print(batch)
# if (batch<586):
# # Reset batch tracking variables
# batch_loss, batch_counts = 0, 0
# t0_batch = time.time()
# # Reset batch tracking variables
batch_loss, batch_counts = 0, 0
t0_batch = time.time()
# Calculate the average loss over the entire training data
# avg_train_loss = total_loss / (batch_size*batch_epochs)
# =======================================
# Evaluation
# =======================================
if evaluation == True:
# After the completion of each training epoch, measure the model's performance
# on our validation set.
val_loss = evaluate(model, Ie=I_tst,Be=B_tst,Me=M_tst,batch_size_e=batch_size_tst)
if val_loss<0.32:
print('\n Consider this one with val:', val_loss,' at:',batch,'\n')
print("-"*50)
# Calculate the average loss over the entire training data
# avg_train_loss = total_loss / (batch_size*batch_epochs)
val_loss = evaluate(model, Ie=I_tst,Be=B_tst,Me=M_tst,batch_size_e=batch_size_tst)
print(f"{batch+ 1:^7}|{batch_loss / batch_counts:^12.6f} | {val_loss:^10.6f} | {time_elapsed:^9.2f}") #| {step:^7}
print("Training complete!")
def evaluate(model, Ie,Be,Me,batch_size_e):
"""After the completion of each training epoch, measure the model's performance
on our validation set.
"""
# Put the model into the evaluation mode. The dropout layers are disabled during
# the test time.
model.eval()
# Tracking variables
val_loss = []
# For each batch in our validation set...
for batch in range(1):
# Load batch to GPU
b_input_ids, b_attn_mask, b_ys,_ = tuple(t.to(device) for t in dta_ldr2(Ie,Be,Me,batch_size_e))
# Compute logits
with torch.no_grad():
preds = model(b_input_ids, b_attn_mask)
# Compute loss
loss = loss_fn(preds, b_ys)
val_loss.append(loss.item())
# Compute the absolutr error and loss over the validation set.
val_loss = np.mean(val_loss)
return val_loss
bert_regressor = BertRegressor()
bert_regressor.load_state_dict(torch.load("MABMO_product",map_location=torch.device(device)))
bert_regressor.eval()
def bert_predict(model, test_dataloader):
"""Perform a forward pass on the trained BERT model to predict probabilities
on the test set.
"""
# Put the model into the evaluation mode. The dropout layers are disabled during
# the test time.
model.eval()
all_preds = []
# For each batch in our test set...
for batch in range(1):
# Load batch to GPU
b_input_ids, b_attn_mask = tuple(t.to(device) for t in test_dataloader)[:2]
# Compute predictions
with torch.no_grad():
preds = model(b_input_ids, b_attn_mask)#.to(device)
all_preds.append(preds)
# Concatenate predictions from each batch
all_preds = torch.cat(all_preds, dim=0)
return all_preds
def out_df(data,predictions,df_beh=Behaviors,df_ident=Identities,df_mod=Modifiers):
df2=pd.concat([pd.DataFrame(scaler_M.inverse_transform(predictions[:,0:3].cpu())),
pd.DataFrame(scaler_M.inverse_transform(data[2][:,0:3])),
pd.DataFrame(scaler_I.inverse_transform(predictions[:,3:6].cpu())),
pd.DataFrame(scaler_I.inverse_transform(data[2][:,3:6])),
pd.DataFrame(scaler_B.inverse_transform(predictions[:,6:9].cpu())),
pd.DataFrame(scaler_B.inverse_transform(data[2][:,6:9])),
pd.DataFrame(scaler_M.inverse_transform(predictions[:,9:12].cpu())),
pd.DataFrame(scaler_M.inverse_transform(data[2][:,9:12])),
pd.DataFrame(scaler_I.inverse_transform(predictions[:,12:15].cpu())),
pd.DataFrame(scaler_I.inverse_transform(data[2][:,12:15])),pd.DataFrame(np.array(data[3]))
],axis=1).set_axis(['EEMA', 'EPMA', 'EAMA','EM1', 'PM1', 'AM1',
'EEA', 'EPA', 'EAA','EA', 'PA', 'AA',
'EEB', 'EPB', 'EAB','EB', 'PB', 'AB',
'EEMO', 'EPMO', 'EAMO','EM2', 'PM2', 'AM2',
'EEO', 'EPO', 'EAO','EO', 'PO', 'AO',
'idx_ModA','idx_Act','idx_Beh','idx_ModO','idx_Obj'], axis=1, inplace=False)
df2=pd.merge(df2, df_mod[['term','index_in_dic']], left_on= ['idx_ModA'], right_on = ["index_in_dic"],
how='left').rename(columns={"term": 'ModA'}).drop(['index_in_dic'], axis=1)
df2=pd.merge(df2, df_ident[['term','index_in_dic']], left_on= ['idx_Act'], right_on = ["index_in_dic"],
how='left').rename(columns={"term": 'Actor'}).drop(['index_in_dic'], axis=1)
df2=pd.merge(df2, df_beh[['term','index_in_dic']], left_on= ['idx_Beh'], right_on = ["index_in_dic"],
how='left').rename(columns={"term": 'Behavior'}).drop(['index_in_dic'], axis=1)
df2=pd.merge(df2, df_mod[['term','index_in_dic']], left_on= ['idx_ModO'], right_on = ["index_in_dic"],
how='left').rename(columns={"term": 'ModO'}).drop(['index_in_dic'], axis=1)
df2=pd.merge(df2, df_ident[['term','index_in_dic']], left_on= ['idx_Obj'], right_on = ["index_in_dic"],
how='left').rename(columns={"term": 'Object'}).drop(['index_in_dic'], axis=1)
df2=df2[['EEMA','EPMA', 'EAMA', 'EEA', 'EPA', 'EAA', 'EEB', 'EPB', 'EAB','EEMO', 'EPMO', 'EAMO', 'EEO', 'EPO', 'EAO','EM1', 'PM1', 'AM1','EA', 'PA', 'AA', 'EB', 'PB','AB', 'EM2', 'PM2', 'AM2', 'EO',
'PO', 'AO', 'ModA','Actor','Behavior', 'ModO', 'Object']]
return(df2)
def get_output(I_b=n_Identities,B_b=n_Behaviors,M_b=n_Modifiers,batch_sz=3000,batch_num=10):
df=pd.DataFrame()
for i in range(batch_num):
q=dta_ldr2(I=I_b,B=B_b,M=M_b,batch_size=batch_sz)
preds = bert_predict(bert_regressor.to(device), q)
df2=out_df(data=q,predictions=preds)
df=pd.concat([df,df2],axis=0)
return(df)
def gen_new(Identity,Behavior,Modifier,n_df,word_type):
if word_type=='identity':
ident1=n_df.sample(axis = 0,random_state=56)
else:ident1=Identity.sample(axis = 0,random_state=6)
ident2=Identity.sample(axis = 0,random_state=6)
if word_type=='behavior':
behav=n_df.sample(axis = 0,random_state=5)
else: behav=Behavior.sample(axis = 0,random_state=5)
if word_type=='modifier':
modif1=n_df.sample(axis = 0,random_state=55)
else: modif1=Modifier.sample(axis = 0)
modif2=Modifier.sample(axis = 0,random_state=96)
id1=list(ident1.term)
id2=list(ident2.term)
beh=list(behav.term)
mod1=list(modif1.term)
mod2=list(modif2.term)
# wrdvc_ident1=gs_model.get_vector((list(ident1.trm_org))[0], norm=True)
sents=' '.join(map(str, (mod1+id1+beh+mod2+id2)))
values=np.concatenate([(modif1[['E','P','A']]).to_numpy(),
(ident1[['E','P','A']]).to_numpy(),
(behav[['E','P','A']]).to_numpy(),
(modif2[['E','P','A']]).to_numpy(),
(ident2[['E','P','A']]).to_numpy()], axis=1)[0]
# print(values)
#indexx=[(ident1['index_in_dic']).to_numpy()][0][0]
indexx=torch.tensor([[(modif1['index_in_dic']).to_numpy()][0][0],
[(ident1['index_in_dic']).to_numpy()][0][0],
[(behav['index_in_dic']).to_numpy()][0][0],
[(modif2['index_in_dic']).to_numpy()][0][0],
[(ident2['index_in_dic']).to_numpy()][0][0]])
ys= torch.tensor(values)
inputs, masks = preprocessing_for_bert([sents])
# data=TensorDataset(inputs, masks, ys)
yield inputs, masks, ys,indexx #torch.tensor(sents),
def ldr_new(I,B,M,N_df,WT,batch_size=32):
dt_ldr= [x for x in DataLoader([next(gen_new(I,B,M,N_df,WT)) for x in range(batch_size)], batch_size=batch_size)][0]
return(dt_ldr)
cols=['EEMA', 'EPMA', 'EAMA', 'EEA', 'EPA', 'EAA', 'EEB', 'EPB', 'EAB',
'EEMO', 'EPMO', 'EAMO', 'EEO', 'EPO', 'EAO', 'ModA', 'Actor', 'Behavior', 'ModO', 'Object']
def get_output_new(w,wt,I_b=n_Identities,B_b=n_Behaviors,M_b=n_Modifiers,batch_sz=300,batch_num=1,columnss=cols,cus_col=1):
df=pd.DataFrame()
for i in range(batch_num):
new_df=pd.DataFrame({'index_in_dic':1000,'term':w,'E':10,'P':10,'A':10,'E2':10,'P2':10,'A2':10,'term2':w,'len_Bert':3}, index=[0])
q=ldr_new(I=I_b,B=B_b,M=M_b,N_df=new_df,WT=wt,batch_size=batch_sz)
preds = bert_predict(bert_regressor.to(device), q)
if wt=='identity':
df_identity=pd.concat([Identities,new_df],axis=0)
df2=out_df(data=q,predictions=preds,df_ident=df_identity)
if cus_col:
columnss=[ 'EEA', 'EPA', 'EAA', 'ModA', 'Actor', 'Behavior', 'ModO', 'Object']
if wt=='behavior':
df_behavior=pd.concat([Behaviors,new_df],axis=0)
df2=out_df(data=q,predictions=preds,df_beh=df_behavior)
if cus_col:
columnss=['EEB', 'EPB', 'EAB', 'ModA', 'Actor', 'Behavior', 'ModO', 'Object']
if wt=='modifier':
df_modifier=pd.concat([Modifiers,new_df],axis=0)
df2=out_df(data=q,predictions=preds,df_mod=df_modifier)
if cus_col:
columnss=['EEMA', 'EPMA', 'EAMA', 'ModA', 'Actor', 'Behavior', 'ModO', 'Object']
df=pd.concat([df,df2],axis=0)
return(df[columnss])
def gen_new(Identity,Behavior,Modifier,n_df,word_type):
if word_type=='identity':
ident1=n_df.sample(axis = 0)
else:ident1=Identity.sample(axis = 0)
ident2=Identity.sample(axis = 0)
if word_type=='behavior':
behav=n_df.sample(axis = 0)
else: behav=Behavior.sample(axis = 0)
if word_type=='modifier':
modif1=n_df.sample(axis = 0)
else: modif1=Modifier.sample(axis = 0)
modif2=Modifier.sample(axis = 0)
id1=list(ident1.term)
id2=list(ident2.term)
beh=list(behav.term)
mod1=list(modif1.term)
mod2=list(modif2.term)
# wrdvc_ident1=gs_model.get_vector((list(ident1.trm_org))[0], norm=True)
sents=' '.join(map(str, (mod1+id1+beh+mod2+id2)))
values=np.concatenate([(modif1[['E','P','A']]).to_numpy(),
(ident1[['E','P','A']]).to_numpy(),
(behav[['E','P','A']]).to_numpy(),
(modif2[['E','P','A']]).to_numpy(),
(ident2[['E','P','A']]).to_numpy()], axis=1)[0]
# print(values)
#indexx=[(ident1['index_in_dic']).to_numpy()][0][0]
indexx=torch.tensor([[(modif1['index_in_dic']).to_numpy()][0][0],
[(ident1['index_in_dic']).to_numpy()][0][0],
[(behav['index_in_dic']).to_numpy()][0][0],
[(modif2['index_in_dic']).to_numpy()][0][0],
[(ident2['index_in_dic']).to_numpy()][0][0]])
ys= torch.tensor(values)
inputs, masks = preprocessing_for_bert([sents])
# data=TensorDataset(inputs, masks, ys)
yield inputs, masks, ys,indexx #torch.tensor(sents),
def ldr_new(I,B,M,N_df,WT,batch_size=32):
dt_ldr= [x for x in DataLoader([next(gen_new(I,B,M,N_df,WT)) for x in range(batch_size)], batch_size=batch_size)][0]
return(dt_ldr)
def sent_gen(sentence):
sents=sentence
indexx=torch.tensor([1,1,1,1,1,1,1,1,1,1,1,1])
ys= torch.tensor([1,1,1,1,1,1,1,1,1,1,1,1])
inputs, masks = preprocessing_for_bert([sents])
yield inputs, masks, ys,indexx #torch.tensor(sents),
def sent_ldr(sent2,batch_size=1):
dt_ldr= [x for x in DataLoader([next(sent_gen(sent2)) for x in range(batch_size)], batch_size=batch_size)][0]
return(dt_ldr)
def EPA_sents(sent):
q=sent_ldr(sent)
predictions=bert_predict(bert_regressor.to(device), q)
df_out=pd.concat([pd.DataFrame(scaler_M.inverse_transform(predictions[:,0:3].cpu())),
pd.DataFrame(scaler_I.inverse_transform(predictions[:,3:6].cpu())),
pd.DataFrame(scaler_B.inverse_transform(predictions[:,6:9].cpu())),
pd.DataFrame(scaler_M.inverse_transform(predictions[:,9:12].cpu())),
pd.DataFrame(scaler_I.inverse_transform(predictions[:,12:15].cpu()))
],axis=1).set_axis(['EEMA', 'EPMA', 'EAMA',
'EEA', 'EPA', 'EAA', 'EEB', 'EPB', 'EAB',
'EEMO', 'EPMO', 'EAMO','EEO', 'EPO', 'EAO'], axis=1, inplace=False)
return(df_out.round(decimals=2))
# Ref: https://stackoverflow.com/questions/28778668/freeze-header-in-pandas-dataframe
from ipywidgets import interact, IntSlider
from IPython.display import display
def freeze_header(df, num_rows=30, num_columns=10, step_rows=1,
step_columns=1):
"""
Freeze the headers (column and index names) of a Pandas DataFrame. A widget
enables to slide through the rows and columns.
Parameters
----------
df : Pandas DataFrame
DataFrame to display
num_rows : int, optional
Number of rows to display
num_columns : int, optional
Number of columns to display
step_rows : int, optional
Step in the rows
step_columns : int, optional
Step in the columns
Returns
-------
Displays the DataFrame with the widget
"""
@interact(last_row=IntSlider(min=min(num_rows, df.shape[0]),
max=df.shape[0],
step=step_rows,
description='rows',
readout=False,
disabled=False,
continuous_update=True,
orientation='horizontal',
slider_color='purple'),
last_column=IntSlider(min=min(num_columns, df.shape[1]),
max=df.shape[1],
step=step_columns,
description='columns',
readout=False,
disabled=False,
continuous_update=True,
orientation='horizontal',
slider_color='purple'))
def _freeze_header(last_row, last_column):
display(df.iloc[max(0, last_row-num_rows):last_row,
max(0, last_column-num_columns):last_column])