File size: 1,419 Bytes
8ff6144
 
 
 
 
 
 
 
 
 
 
 
70d2374
8ff6144
 
 
 
 
 
70d2374
8ff6144
 
 
 
 
70d2374
8ff6144
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70d2374
8ff6144
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
from .artifact import Artifact


class Format(Artifact):
    pass


class SizeLimitingFormat(Format):
    size_limiter: Artifact = None


class ICLFormat(SizeLimitingFormat):
    prefix: str = ""
    input_prefix: str = ""
    output_prefix: str = ""
    target_prefix: str = " "
    instruction_prefix: str = ""
    input_output_separator: str = "\n"
    demo_separator: str = "\n\n"
    suffix: str = ""

    def single_source_str(self, source):
        return self.input_prefix + source + self.input_output_separator + self.output_prefix

    def format(self, instance, demos_instances=[]):
        source = self.prefix

        query_str = self.single_source_str(instance["source"])

        if "instruction" in instance:
            instruction = instance.pop("instruction")
            source += self.instruction_prefix + instruction + self.demo_separator

        for demo_instance in demos_instances:
            demo_str = (
                self.single_source_str(demo_instance["source"])
                + self.target_prefix
                + demo_instance["target"]
                + self.demo_separator
            )

            if self.size_limiter is not None:
                if not self.size_limiter.check(source + demo_str + query_str + instance["target"]):
                    continue

            source += demo_str

        source += query_str
        source += self.suffix

        return source