File size: 4,187 Bytes
683d0f6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
[
    {
        "dataset": "cola",
        "type": "translation",
        "origin_acc": 79.5,
        "origin_prompt": "Functioning as a grammar evaluation tool, analyze the given sentence and decide if it is grammatically correct, responding with 'acceptable' or 'unacceptable':",
        "atk_acc": 25.0,
        "atk_prompt": "Classify the grammar of a given sentence into two categories, 'acceptable' or 'unacceptable', and judge its accuracy.",
        "transfer_ori_acc": 57.099999999999994,
        "transfer_atk_acc": 31.5
    },
    {
        "dataset": "mrpc",
        "type": "translation",
        "origin_acc": 76.5,
        "origin_prompt": "Can the given sentences be considered semantically identical? Please reply with 'equivalent' or 'not_equivalent'.",
        "atk_acc": 7.0,
        "atk_prompt": "As an expert in semantic comparison, assess the similarity between the following two sentences and determine whether they are 'equivalent' or 'not_equivalent'.",
        "transfer_ori_acc": 53.431372549019606,
        "transfer_atk_acc": 26.225490196078432
    },
    {
        "dataset": "qnli",
        "type": "translation",
        "origin_acc": 79.5,
        "origin_prompt": "Assess if the answer to the question can be logically concluded from the provided context by choosing 'entailment' or 'not_entailment'.",
        "atk_acc": 56.5,
        "atk_prompt": "Check the text link to the question and answer 'entailment' or 'not_entailment', depending on your language skills.",
        "transfer_ori_acc": 19.7,
        "transfer_atk_acc": 5.2
    },
    {
        "dataset": "qqp",
        "type": "translation",
        "origin_acc": 84.0,
        "origin_prompt": "Determine if the given pair of statements can be considered the same by responding with 'equivalent' or 'not_equivalent'.",
        "atk_acc": 3.0,
        "atk_prompt": "As a tool for determining the equivalence of problems, the problems are examined and their similarity is classified as 'equivalent' or 'not_equivalent'.",
        "transfer_ori_acc": 13.5,
        "transfer_atk_acc": 2.3
    },
    {
        "dataset": "rte",
        "type": "translation",
        "origin_acc": 84.0,
        "origin_prompt": "Determine if the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment'.",
        "atk_acc": 39.0,
        "atk_prompt": "Determine whether the given sentences have a cause-effect relationship as an engagement analysis tool and categorize them as 'entailment' or 'not_entailment'.",
        "transfer_ori_acc": 41.15523465703971,
        "transfer_atk_acc": 22.382671480144403
    },
    {
        "dataset": "sst2",
        "type": "translation",
        "origin_acc": 96.0,
        "origin_prompt": "Read the provided excerpt and choose between 'positive' and 'negative' to describe its sentiment:",
        "atk_acc": 29.5,
        "atk_prompt": "In the context of textual analysis, what classification is this phrase between 'positive' and 'negative'?",
        "transfer_ori_acc": 27.981651376146786,
        "transfer_atk_acc": 44.8394495412844
    },
    {
        "dataset": "wnli",
        "type": "translation",
        "origin_acc": 67.61,
        "origin_prompt": "Examine the pair of sentences and determine if they exhibit entailment or not_entailment. Answer with either 'entailment' or 'not_entailment'.",
        "atk_acc": 26.76,
        "atk_prompt": "Using implication analysis, determine whether the given sentences have a cause-effect relationship and categorize them as 'entailment' or 'not_entailment'.",
        "transfer_ori_acc": 33.80281690140845,
        "transfer_atk_acc": 36.61971830985916
    },
    {
        "dataset": "mmlu",
        "type": "translation",
        "origin_acc": 64.0,
        "origin_prompt": "In relation to the multiple-choice question on {}, please provide the accurate answer by choosing 'A', 'B', 'C', or 'D'.",
        "atk_acc": 49.5,
        "atk_prompt": "Choose the correct answer to the multiple-choice question below. Is '{}' an 'A', 'B', 'C', or 'D'.",
        "transfer_ori_acc": 19.326241134751772,
        "transfer_atk_acc": 19.680851063829788
    }
]