assignment / app.py
happycdi's picture
Update app.py
c178deb
from transformers import AutoTokenizer
from datasets import load_dataset
tokenizer = AutoTokenizer.from_pretrained('distilbert-base-cased')
train_data = load_dataset('wikitext', 'wikitext-2-raw-v1', split='train')
token = train_data[10]['text']
tokenized_train_data = tokenizer(token)
tokenized_train_data