alex-abb's picture
Update app.py
4aae3f6 verified
raw
history blame
706 Bytes
import dataset # type: ignore
from dataset import load_dataset #type: ignore
import transformers
from transformers import TFAutoModelForSequenceClassification, AutoTokenizer
model = TFAutoModelForSequenceClassification.from_pretrained("bert-base-uncased")
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
ds = load_dataset("stanfordnlp/sst2")
sst2_dataset = load_dataset("glue", "sst2", split="train")
def encode(examples):
return tokenizer(examples["sentence1"], examples["sentence2"], truncation=True, padding="max_length")
sst2_dataset = sst2_dataset.map(encode, batched=True)
sst2_dataset = sst2_dataset.map(lambda examples: {"labels": examples["label"]}, batched=True)