# Copyright (c) 2022 Graphcore Ltd. All rights reserved. from typing import Dict, Any def form_text(example: Dict[str, Any]) -> Dict[str, Any]: """ Given an example from the glue mnli dataset, generate a prompt version example in the format: mnli hypothesis: {hypothesis} premise: {premise} target: {class_label}<|endoftext|> This format can be used to finetune the model as a Causal Languange Model. """ hypothesis = example['hypothesis'] premise = example['premise'] class_label = ['entailment', 'neutral', 'contradiction'][example['label']] example[ 'text'] = f'mnli hypothesis: {hypothesis} premise: {premise} target: {class_label}<|endoftext|>' return example def split_text(example: Dict[str, Any]) -> Dict[str, Any]: """ Given an example in the format mnli hypothesis: {hypothesis} premise: {premise} target: {class_label}<|endoftext|> split it in the prompt to be used for validation (excluding the target) and the class label. """ partition = example['text'].rpartition(' ') example['prompt_text'] = partition[0] example['class_label'] = partition[2].replace('<|endoftext|>', '') return example