File size: 386 Bytes
9ee675e
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
from transformers import ( 
    AutoTokenizer,
)


def load_tokenizer(model_tokenizer):
    """Load the tokenizer"""
    return AutoTokenizer.from_pretrained(model_tokenizer)


def preprocessing_text(text, tokenizer):
    """Tokenize the text"""
    return tokenizer.encode_plus(text, max_length=130, pad_to_max_length=True, padding='max_length', truncation=True, return_tensors='pt')