File size: 2,386 Bytes
8ff6144
 
 
 
 
 
 
 
 
 
 
 
70d2374
8ff6144
 
 
 
 
 
70d2374
1247c04
 
8ff6144
 
1247c04
 
 
 
 
 
8ff6144
1247c04
 
 
 
 
 
 
 
 
8ff6144
1247c04
 
 
 
8ff6144
1247c04
8ff6144
 
1247c04
 
 
 
 
8ff6144
 
1247c04
 
 
 
 
 
 
8ff6144
 
 
 
 
 
 
 
 
1247c04
 
 
8ff6144
 
 
 
 
70d2374
8ff6144
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
from .artifact import Artifact


class Format(Artifact):
    pass


class SizeLimitingFormat(Format):
    size_limiter: Artifact = None


class ICLFormat(SizeLimitingFormat):
    prefix: str = ""
    input_prefix: str = ""
    output_prefix: str = ""
    target_prefix: str = " "
    instruction_prefix: str = ""
    input_output_separator: str = "\n"
    demo_separator: str = "\n\n"
    suffix: str = ""
    add_instruction_at_start: bool = True
    add_instruction_after_demos: bool = False

    def single_source_str(self, source):
        return (
            self.input_prefix
            + source
            + self.input_output_separator
            + self.output_prefix
        )

    def single_source_str_with_instruction(self, source, instruction):
        return (
            self.input_prefix
            + instruction
            + self.demo_separator
            + source
            + self.input_output_separator
            + self.output_prefix
        )

    def format(self, instance, demos_instances=None):
        if demos_instances is None:
            demos_instances = []
        source = self.prefix

        instruction = ""
        if "instruction" in instance:
            instruction = instance.pop("instruction")
            assert (
                "instruction" != None
            ), f"instruction field can not be none : {instance}"

        if self.add_instruction_at_start and instruction != "":
            source += self.instruction_prefix + instruction + self.demo_separator

        if self.add_instruction_after_demos and instruction != "":
            query_str = self.single_source_str_with_instruction(
                instance["source"], instruction
            )
        else:
            query_str = self.single_source_str(instance["source"])

        for demo_instance in demos_instances:
            demo_str = (
                self.single_source_str(demo_instance["source"])
                + self.target_prefix
                + demo_instance["target"]
                + self.demo_separator
            )

            if self.size_limiter is not None:
                if not self.size_limiter.check(
                    source + demo_str + query_str + instance["target"]
                ):
                    continue

            source += demo_str

        source += query_str
        source += self.suffix
        return source