|
from transformers import BertTokenizer
|
|
|
|
class Decoder:
|
|
def __init__(self):
|
|
import pickle
|
|
with open('pipeline/preprocessing/encoder_toxicity.pkl', 'rb') as f:
|
|
self.__encoder_toxicity = pickle.load(f)
|
|
with open('pipeline/preprocessing/encoder_emotion.pkl', 'rb') as f:
|
|
self.__encoder_emotion = pickle.load(f)
|
|
|
|
|
|
def toxicity(self,pred):
|
|
return self.__encoder_toxicity.inverse_transform(pred)
|
|
|
|
def emotion(self,pred):
|
|
return self.__encoder_emotion.inverse_transform(pred)
|
|
|
|
class Preprocessor:
|
|
"""A class used to represent a Preprocessor, which preprocesses text data for the model"""
|
|
def __init__(self, is_multilingual = False):
|
|
if is_multilingual:
|
|
self.tokenizer = BertTokenizer.from_pretrained('bert-base-multilingual-cased')
|
|
else:
|
|
self.tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
|
|
self.decoder = Decoder()
|
|
"""Added a decoder object to the Preprocessor class to decode the one-hot encoded labels"""
|
|
|
|
def preprocess_text(self,text):
|
|
return self.tokenizer.encode(text,add_special_tokens=True, max_length=65,
|
|
padding="max_length", truncation=True, return_attention_mask=False, return_tensors='tf') |