from transformers import PreTrainedTokenizer, AddedToken class CustomTokenizer(PreTrainedTokenizer): def __init__(self, vocab_file, **kwargs): super().__init__(**kwargs) print("Initializing CustomTokenizer") def tokenize(self, text): print("Tokenizing text", text) return text.split()