from transformers import AutoTokenizer from datasets import load_dataset train_data = load_dataset('wikitext', 'wikitext-2-raw-v1', split='train') train_data[10]