File size: 1,200 Bytes
31b0b7e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
# Copyright (c) 2022 Graphcore Ltd. All rights reserved.

from typing import Dict, Any

def form_text(example: Dict[str, Any]) -> Dict[str, Any]:
    """
    Given an example from the glue mnli dataset, generate a prompt version example in the format: 
        mnli hypothesis: {hypothesis} premise: {premise} target: {class_label}<|endoftext|>
    This format can be used to finetune the model as a Causal Languange Model.
    """
    hypothesis = example['hypothesis']
    premise = example['premise']
    class_label = ['entailment', 'neutral', 'contradiction'][example['label']]

    example[
        'text'] = f'mnli hypothesis: {hypothesis} premise: {premise} target: {class_label}<|endoftext|>'
    return example

def split_text(example: Dict[str, Any]) -> Dict[str, Any]:
    """
    Given an example in the format
        mnli hypothesis: {hypothesis} premise: {premise} target: {class_label}<|endoftext|>
    split it in the prompt to be used for validation (excluding the target) and the class label.
    """
    partition = example['text'].rpartition(' ')
    example['prompt_text'] = partition[0]
    example['class_label'] = partition[2].replace('<|endoftext|>', '')
    return example