Spaces:
Sleeping
Sleeping
#bibliotecas | |
import pandas as pd | |
import numpy as np | |
import torch | |
from torch import cuda | |
from torch.nn import functional as F | |
#from sklearn.model_selection import train_test_split | |
import transformers | |
from transformers import ( | |
AutoTokenizer, | |
AutoModelForSequenceClassification, | |
) | |
from sentence_transformers import SentenceTransformer | |
#classes e funcs | |
#parte 1 ########################################################################################################### | |
#parte 1 ########################################################################################################### | |
def convert_label(lista): | |
for x in range(len(lista)): | |
curr = lista[x] | |
lista[x] = 0 if curr =='loss' else 1 if curr == 'hazard' else 2# if curr == 'constraint' else 3 | |
return lista | |
def df_with_pred(labels, predictions, data): | |
lista = [] | |
cont = 0 | |
#predicted = np.argmax(results.logits.cpu(), axis=-1) | |
for test,pred in zip(labels, predictions): | |
lista.append([data.id.iloc[cont],data.req.iloc[cont],test,pred.item()]) | |
cont += 1 | |
return pd.DataFrame(lista, columns=['id','req', 'label', 'pred']) | |
#parte 2 ########################################################################################################### | |
#parte 2 ########################################################################################################### | |
# def organize_predictions_list(predicted, data):#data : ['id','req', 'label', 'pred'] | |
# list_loss = [] | |
# list_hazard = [] | |
# list_constraint = [] | |
# for x in range(len(predicted)): | |
# if(predicted[x] == 0): | |
# list_loss.append([data.id.iloc[x], data.req.iloc[x]]) | |
# elif(predicted[x] == 1): | |
# list_hazard.append([data.id.iloc[x], data.req.iloc[x]]) | |
# elif(predicted[x] == 2): | |
# list_constraint.append([data.id.iloc[x], data.req.iloc[x]]) | |
# return pd.DataFrame(list_loss, columns=['id','req']), pd.DataFrame(list_hazard, columns=['id','req']), pd.DataFrame(list_constraint, columns=['id','req']) | |
def organize_step2_predictions(predictions, list_sentences): | |
list_correct = [] | |
list_incorrect = [] | |
for prediction, sentence in zip(predictions, list_sentences): | |
if prediction == 0: | |
list_correct.append(sentence) | |
else: | |
list_incorrect.append(sentence) | |
return list_correct, list_incorrect | |
def get_incorrect(predicted, data): #data : [id, req] | |
list_incorrect = [] | |
for x in range(len(predicted)): | |
if predicted[x] == 1: | |
list_incorrect.append([data.id.iloc[x],data.req.iloc[x]]) | |
return pd.DataFrame(list_incorrect,columns=['id','req']) | |
#parte 3 ########################################################################################################### | |
#parte 3 ########################################################################################################### | |
def format_examples(df): | |
examples = [] | |
for sentence in df: | |
examples.append([sentence,sentence]) | |
return examples | |
def check_similarity_return(list_incorrect, list_correct, model): | |
embeddings = model.encode(list_correct) | |
for x in range(len(list_incorrect)): | |
id = list_incorrect.id.iloc[x] | |
sentence = list_incorrect.req.iloc[x] | |
sentence = model.encode(sentence) | |
similarity = model.similarity(sentence, embeddings) | |
sim_pair = [] | |
for sim,correct in zip(similarity[0].tolist(), list_correct): | |
sim_pair.append([id, sim, correct[0]]) | |
sim_pair.sort(key=lambda x: x[0]) | |
sim_pair.reverse() | |
return sim_pair[:10] | |
def check_similarity_return2(list_incorrect, list_correct, model): | |
sim_pair = [] | |
embeddings = model.encode(list_correct) | |
for x in range(len(list_incorrect)): | |
id = list_incorrect.id.iloc[x] | |
sentence = list_incorrect.req.iloc[x] | |
sentence = model.encode(sentence) | |
similarity = model.similarity(sentence, embeddings) | |
temp_list = [] | |
for sim,correct in zip(similarity[0].tolist(), list_correct): | |
temp_list.append([id, sim, correct[0]]) | |
temp_list.sort(key=lambda x: x[1]) | |
temp_list.reverse() | |
sim_pair.extend(temp_list[:10]) | |
# print(sim_pair) | |
return sim_pair | |
#parte 4 ########################################################################################################### | |
#parte 4 ########################################################################################################### | |
def list_erro_with_pred(results, data, sub): | |
diff_label = [] | |
cont = 0 | |
predicted = np.argmax(results.logits.cpu(), axis=-1) | |
probabilidade = F.softmax(results.logits.cpu(), dim=-1) | |
for id,req,pred,prob in zip(data.id, data.req, predicted, probabilidade): | |
# print(pred) | |
# print(sub[pred.item()]) | |
# print(prob.tolist()) | |
#diff_label.append([id,req,sub[pred.item()],prob.tolist()]) | |
diff_label.append([id,req,pred.item(),prob.tolist()]) | |
cont+=1 | |
return diff_label | |
######################################################################## | |
######################################################################## | |
######################################################################## | |
######################################################################## | |
########################################### | |