beds_pipeline_beds / functions.py
andreyunic23's picture
Add application file
76d9a63
#bibliotecas
import pandas as pd
import numpy as np
import torch
from torch import cuda
from torch.nn import functional as F
#from sklearn.model_selection import train_test_split
import transformers
from transformers import (
AutoTokenizer,
AutoModelForSequenceClassification,
)
from sentence_transformers import SentenceTransformer
#classes e funcs
#parte 1 ###########################################################################################################
#parte 1 ###########################################################################################################
def convert_label(lista):
for x in range(len(lista)):
curr = lista[x]
lista[x] = 0 if curr =='loss' else 1 if curr == 'hazard' else 2# if curr == 'constraint' else 3
return lista
def df_with_pred(labels, predictions, data):
lista = []
cont = 0
#predicted = np.argmax(results.logits.cpu(), axis=-1)
for test,pred in zip(labels, predictions):
lista.append([data.id.iloc[cont],data.req.iloc[cont],test,pred.item()])
cont += 1
return pd.DataFrame(lista, columns=['id','req', 'label', 'pred'])
#parte 2 ###########################################################################################################
#parte 2 ###########################################################################################################
# def organize_predictions_list(predicted, data):#data : ['id','req', 'label', 'pred']
# list_loss = []
# list_hazard = []
# list_constraint = []
# for x in range(len(predicted)):
# if(predicted[x] == 0):
# list_loss.append([data.id.iloc[x], data.req.iloc[x]])
# elif(predicted[x] == 1):
# list_hazard.append([data.id.iloc[x], data.req.iloc[x]])
# elif(predicted[x] == 2):
# list_constraint.append([data.id.iloc[x], data.req.iloc[x]])
# return pd.DataFrame(list_loss, columns=['id','req']), pd.DataFrame(list_hazard, columns=['id','req']), pd.DataFrame(list_constraint, columns=['id','req'])
def organize_step2_predictions(predictions, list_sentences):
list_correct = []
list_incorrect = []
for prediction, sentence in zip(predictions, list_sentences):
if prediction == 0:
list_correct.append(sentence)
else:
list_incorrect.append(sentence)
return list_correct, list_incorrect
def get_incorrect(predicted, data): #data : [id, req]
list_incorrect = []
for x in range(len(predicted)):
if predicted[x] == 1:
list_incorrect.append([data.id.iloc[x],data.req.iloc[x]])
return pd.DataFrame(list_incorrect,columns=['id','req'])
#parte 3 ###########################################################################################################
#parte 3 ###########################################################################################################
def format_examples(df):
examples = []
for sentence in df:
examples.append([sentence,sentence])
return examples
def check_similarity_return(list_incorrect, list_correct, model):
embeddings = model.encode(list_correct)
for x in range(len(list_incorrect)):
id = list_incorrect.id.iloc[x]
sentence = list_incorrect.req.iloc[x]
sentence = model.encode(sentence)
similarity = model.similarity(sentence, embeddings)
sim_pair = []
for sim,correct in zip(similarity[0].tolist(), list_correct):
sim_pair.append([id, sim, correct[0]])
sim_pair.sort(key=lambda x: x[0])
sim_pair.reverse()
return sim_pair[:10]
def check_similarity_return2(list_incorrect, list_correct, model):
sim_pair = []
embeddings = model.encode(list_correct)
for x in range(len(list_incorrect)):
id = list_incorrect.id.iloc[x]
sentence = list_incorrect.req.iloc[x]
sentence = model.encode(sentence)
similarity = model.similarity(sentence, embeddings)
temp_list = []
for sim,correct in zip(similarity[0].tolist(), list_correct):
temp_list.append([id, sim, correct[0]])
temp_list.sort(key=lambda x: x[1])
temp_list.reverse()
sim_pair.extend(temp_list[:10])
# print(sim_pair)
return sim_pair
#parte 4 ###########################################################################################################
#parte 4 ###########################################################################################################
def list_erro_with_pred(results, data, sub):
diff_label = []
cont = 0
predicted = np.argmax(results.logits.cpu(), axis=-1)
probabilidade = F.softmax(results.logits.cpu(), dim=-1)
for id,req,pred,prob in zip(data.id, data.req, predicted, probabilidade):
# print(pred)
# print(sub[pred.item()])
# print(prob.tolist())
#diff_label.append([id,req,sub[pred.item()],prob.tolist()])
diff_label.append([id,req,pred.item(),prob.tolist()])
cont+=1
return diff_label
########################################################################
########################################################################
########################################################################
########################################################################
###########################################