import datasets | |
from transformers import AutoTokenizer | |
dataset = datasets.load_dataset( # <1> | |
"rotten_tomatoes", # <1> | |
split="train", # <1> | |
) # <1> | |
tokenizer = AutoTokenizer.from_pretrained("bert-base-cased") | |
dataset = datasets.map( # <2> | |
lambda examples: tokenizer(examples["text"]), # <2> | |
batched=True, # <2> | |
) # <2> | |
# <3> | |