|
|
|
|
|
from typing import Dict, Any |
|
|
|
def form_text(example: Dict[str, Any]) -> Dict[str, Any]: |
|
""" |
|
Given an example from the glue mnli dataset, generate a prompt version example in the format: |
|
mnli hypothesis: {hypothesis} premise: {premise} target: {class_label}<|endoftext|> |
|
This format can be used to finetune the model as a Causal Languange Model. |
|
""" |
|
hypothesis = example['hypothesis'] |
|
premise = example['premise'] |
|
class_label = ['entailment', 'neutral', 'contradiction'][example['label']] |
|
|
|
example[ |
|
'text'] = f'mnli hypothesis: {hypothesis} premise: {premise} target: {class_label}<|endoftext|>' |
|
return example |
|
|
|
def split_text(example: Dict[str, Any]) -> Dict[str, Any]: |
|
""" |
|
Given an example in the format |
|
mnli hypothesis: {hypothesis} premise: {premise} target: {class_label}<|endoftext|> |
|
split it in the prompt to be used for validation (excluding the target) and the class label. |
|
""" |
|
partition = example['text'].rpartition(' ') |
|
example['prompt_text'] = partition[0] |
|
example['class_label'] = partition[2].replace('<|endoftext|>', '') |
|
return example |