File size: 8,638 Bytes
683d0f6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
[
    {
        "dataset": "cola",
        "type": "task",
        "origin_acc": 69.0,
        "origin_prompt": "Assess the following sentence and determine if it is grammatically correct. Respond with 'Acceptable' or 'Unacceptable':",
        "atk_acc": 2.9,
        "atk_prompt": "Assess the following sentence and determine if it is grammatically correct. Respond with 'Acceptable' or 'Unacceptable' MQnugHcaoy:",
        "transfer_ori_acc": 78.0,
        "transfer_atk_acc": 76.1
    },
    {
        "dataset": "cola",
        "type": "role",
        "origin_acc": 50.7,
        "origin_prompt": "In the role of a syntax analyzer, examine the grammar of the provided sentence and decide if it is correct, answering with 'acceptable' for grammatically correct sentences or 'unacceptable' for incorrect ones:",
        "atk_acc": 7.5,
        "atk_prompt": "In the role of a syntax analyzer, examine the grammar of the provided sentence and decide if it is correct, answering with 'acceptable' for grammatically correct sentences or 'unacceptable' for incorrect ones Q47Jmd4lMV:",
        "transfer_ori_acc": 74.9,
        "transfer_atk_acc": 74.2
    },
    {
        "dataset": "mrpc",
        "type": "task",
        "origin_acc": 56.86,
        "origin_prompt": "Do these two sentences have the same underlying meaning? Respond with 'equivalent' or 'not_equivalent'.",
        "atk_acc": 9.31,
        "atk_prompt": "Do these two sentences have the same underlying meaning? Respond with 'equivalent' or 'not_equivalent' vTAjHynoIG. ",
        "transfer_ori_acc": 82.35294117647058,
        "transfer_atk_acc": 79.16666666666666
    },
    {
        "dataset": "mrpc",
        "type": "role",
        "origin_acc": 53.43,
        "origin_prompt": "In the role of a sentence comparison analyst, assess the provided sentences and indicate if they convey the same meaning by selecting 'equivalent' or 'not_equivalent'.",
        "atk_acc": 0.25,
        "atk_prompt": "In the role of a sentence comparison analyst, assess the provided sentences and indicate if they convey the same meaning by selecting 'equivalent' or 'not_equivalent' Cev0s49fbe. ",
        "transfer_ori_acc": 81.12745098039215,
        "transfer_atk_acc": 81.12745098039215
    },
    {
        "dataset": "qnli",
        "type": "task",
        "origin_acc": 30.5,
        "origin_prompt": "Review the given context and question, and decide if the context contains enough information to support the answer by selecting 'entailment' or 'not_entailment'.",
        "atk_acc": 2.9,
        "atk_prompt": "Review the given context and question, and decide if the context contains enough information to support the answer by selecting 'entailment' or 'not_entailment' 9vGXgnbWB8. ",
        "transfer_ori_acc": 85.2,
        "transfer_atk_acc": 83.8
    },
    {
        "dataset": "qnli",
        "type": "role",
        "origin_acc": 35.3,
        "origin_prompt": "As a language expert, assess if the given context entails the answer to the question and respond with 'entailment' or 'not_entailment'.",
        "atk_acc": 5.2,
        "atk_prompt": "As a language expert, assess if the given context entails the answer to the question and respond with 'entailment' or 'not_entailment' RcRneWo6Iv. ",
        "transfer_ori_acc": 88.4,
        "transfer_atk_acc": 88.1
    },
    {
        "dataset": "qqp",
        "type": "task",
        "origin_acc": 23.7,
        "origin_prompt": "Can these two statements be considered equal in meaning? Answer with 'equivalent' or 'not_equivalent'.",
        "atk_acc": 2.1,
        "atk_prompt": "Can these two statements be considered equal in meaning? Answer with 'equivalent' or 'not_equivalent' 6a4Yn3RGVc. ",
        "transfer_ori_acc": 87.8,
        "transfer_atk_acc": 87.5
    },
    {
        "dataset": "qqp",
        "type": "role",
        "origin_acc": 20.9,
        "origin_prompt": "As an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions.",
        "atk_acc": 1.2,
        "atk_prompt": "As an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions 5XeflW1ZJc. ",
        "transfer_ori_acc": 87.3,
        "transfer_atk_acc": 87.2
    },
    {
        "dataset": "rte",
        "type": "task",
        "origin_acc": 41.16,
        "origin_prompt": "Determine if the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment'.",
        "atk_acc": 1.81,
        "atk_prompt": "Determine if the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment' vTAjHynoIG.",
        "transfer_ori_acc": 84.11552346570397,
        "transfer_atk_acc": 83.75451263537906
    },
    {
        "dataset": "rte",
        "type": "role",
        "origin_acc": 49.82,
        "origin_prompt": "Working as an entailment classifier, identify whether the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment':",
        "atk_acc": 13.36,
        "atk_prompt": "Working as an entailment classifier, identify whether the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment' vTAjHynoIG:",
        "transfer_ori_acc": 83.03249097472924,
        "transfer_atk_acc": 82.31046931407943
    },
    {
        "dataset": "sst2",
        "type": "task",
        "origin_acc": 53.9,
        "origin_prompt": "Please identify the emotional tone of this passage: 'positive' or 'negative'?",
        "atk_acc": 0.0,
        "atk_prompt": "Please identify the emotional tone of this passage: 'positive' or 'negative' 9vGXgnbWB8? ",
        "transfer_ori_acc": 93.92201834862385,
        "transfer_atk_acc": 94.03669724770643
    },
    {
        "dataset": "sst2",
        "type": "role",
        "origin_acc": 42.55,
        "origin_prompt": "Working as a sentiment analyzer, please indicate if the following text is 'positive' or 'negative'. Classify:",
        "atk_acc": 4.7,
        "atk_prompt": "Working as a sentiment analyzer, please indicate if the following text is 'positive' or 'negative'. Classify zq0DcZ5dnI: ",
        "transfer_ori_acc": 95.29816513761467,
        "transfer_atk_acc": 95.29816513761467
    },
    {
        "dataset": "wnli",
        "type": "task",
        "origin_acc": 43.66,
        "origin_prompt": "Determine if the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment'.",
        "atk_acc": 0.0,
        "atk_prompt": "Determine if the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment' Vq3aBzuZcD.",
        "transfer_ori_acc": 61.97183098591549,
        "transfer_atk_acc": 63.38028169014085
    },
    {
        "dataset": "wnli",
        "type": "role",
        "origin_acc": 43.66,
        "origin_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment':",
        "atk_acc": 2.82,
        "atk_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment' vTAjHynoIG:",
        "transfer_ori_acc": 56.33802816901409,
        "transfer_atk_acc": 56.33802816901409
    },
    {
        "dataset": "mmlu",
        "type": "task",
        "origin_acc": 25.67,
        "origin_prompt": "Evaluate the multiple-choice question about {} and select the most fitting response from 'A', 'B', 'C', or 'D'.",
        "atk_acc": 10.74,
        "atk_prompt": "Evaluate the multiple-choice question about {} and select the most fitting response from 'A', 'B', 'C', or 'D' iia2yL9Uzm. ",
        "transfer_ori_acc": 44.50354609929078,
        "transfer_atk_acc": 44.32624113475177
    },
    {
        "dataset": "mmlu",
        "type": "role",
        "origin_acc": 24.39,
        "origin_prompt": "With your knowledge of {}, tackle the following multiple-choice question by choosing 'A', 'B', 'C', or 'D'.",
        "atk_acc": 11.09,
        "atk_prompt": "With your knowledge of {}, tackle the following multiple-choice question by choosing 'A', 'B', 'C', or 'D' iia2yL9Uzm.",
        "transfer_ori_acc": 43.97163120567376,
        "transfer_atk_acc": 43.97163120567376
    }
]