File size: 901 Bytes
03561be
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
import json

from mmgpt.datasets.dolly_dataset import DollyDataset


class AlpacaGPT4Dataset(DollyDataset):
    """

    ```json

    [

        {

            "instruction": "Identify the odd one out.",

            "input": "Twitter, Instagram, Telegram",

            "output": "The odd one out is Telegram. Twitter and Instagram are social media platforms mainly for sharing information, images and videos while Telegram is a cloud-based instant messaging and voice-over-IP service."

        },

    ]

    """

    def load_annotation(self, ann_path):
        self.annotation = json.load(open(ann_path, "r"))

    def process_text(self, ann):
        instruction = ann["instruction"]
        input = ann["input"]
        output = ann["output"]
        instruction = self.prompter(instruction=instruction, input=input)
        return dict(instruction=instruction, answer=output)