File size: 1,200 Bytes
31b0b7e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 |
# Copyright (c) 2022 Graphcore Ltd. All rights reserved.
from typing import Dict, Any
def form_text(example: Dict[str, Any]) -> Dict[str, Any]:
"""
Given an example from the glue mnli dataset, generate a prompt version example in the format:
mnli hypothesis: {hypothesis} premise: {premise} target: {class_label}<|endoftext|>
This format can be used to finetune the model as a Causal Languange Model.
"""
hypothesis = example['hypothesis']
premise = example['premise']
class_label = ['entailment', 'neutral', 'contradiction'][example['label']]
example[
'text'] = f'mnli hypothesis: {hypothesis} premise: {premise} target: {class_label}<|endoftext|>'
return example
def split_text(example: Dict[str, Any]) -> Dict[str, Any]:
"""
Given an example in the format
mnli hypothesis: {hypothesis} premise: {premise} target: {class_label}<|endoftext|>
split it in the prompt to be used for validation (excluding the target) and the class label.
"""
partition = example['text'].rpartition(' ')
example['prompt_text'] = partition[0]
example['class_label'] = partition[2].replace('<|endoftext|>', '')
return example |