from transformers import AutoTokenizer from datasets import load_dataset tokenizer = AutoTokenizer.from_pretrained('distilbert-base-cased') train_data = load_dataset('wikitext', 'wikitext-2-raw-v1', split='train') token = train_data[10]['text'] tokenized_train_data = tokenizer(token) tokenized_train_data