Evaluate on MNLI:

from transformers import (
    default_data_collator,
    AutoTokenizer,
    AutoModelForSequenceClassification,
    Trainer,
)
from datasets import load_dataset

import functools

from utils import compute_metrics, preprocess_function

model_name = "George-Ogden/gpt2-medium-finetuned-mnli"
model = AutoModelForSequenceClassification.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)
trainer = Trainer(
    model=model,
    eval_dataset="mnli",
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
    data_collator=default_data_collator,
)

raw_datasets = load_dataset(
    "glue",
    "mnli",
).map(functools.partial(preprocess_function, tokenizer), batched=True)

tasks = ["mnli", "mnli-mm"]
eval_datasets = [
    raw_datasets["validation_matched"],
    raw_datasets["validation_mismatched"],
]

for layers in reversed(range(model.num_layers + 1)):
    for eval_dataset, task in zip(eval_datasets, tasks):
        metrics = trainer.evaluate(eval_dataset=eval_dataset)
        metrics["eval_samples"] = len(eval_dataset)

        if task == "mnli-mm":
            metrics = {k + "_mm": v for k, v in metrics.items()}

        trainer.log_metrics(metrics)
Downloads last month
13
Safetensors
Model size
355M params
Tensor type
F32
·
Inference Examples
This model does not have enough activity to be deployed to Inference API (serverless) yet. Increase its social visibility and check back later, or deploy to Inference Endpoints (dedicated) instead.