from transformers import ( | |
AutoTokenizer, | |
) | |
def load_tokenizer(model_tokenizer): | |
"""Load the tokenizer""" | |
return AutoTokenizer.from_pretrained(model_tokenizer) | |
def preprocessing_text(text, tokenizer): | |
"""Tokenize the text""" | |
return tokenizer.encode_plus(text, max_length=130, pad_to_max_length=True, padding='max_length', truncation=True, return_tensors='pt') | |