File size: 12,167 Bytes
683d0f6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
[
    {
        "dataset": "cola",
        "type": "task",
        "origin_acc": 80.5,
        "origin_prompt": "Examine the given sentence and decide if it is grammatically sound. Answer with either 'Acceptable' or 'Unacceptable'.",
        "atk_acc": 78.5,
        "atk_prompt": "Examine the given sentence and decide if it is gramatically sound. nAswer with either 'Acceptable' or 'Unacceptable'. ",
        "transfer_ori_acc": 77.7,
        "transfer_atk_acc": 77.8
    },
    {
        "dataset": "cola",
        "type": "role",
        "origin_acc": 81.5,
        "origin_prompt": "Functioning as a grammar evaluation tool, analyze the given sentence and decide if it is grammatically correct, responding with 'acceptable' or 'unacceptable'.",
        "atk_acc": 78.5,
        "atk_prompt": "Funtioning as a graMmar evaluation tool, analyzY the given sentence and decide if it is grammatically corLect, responding with 'acceptable' or 'unacceptable'. ",
        "transfer_ori_acc": 75.2,
        "transfer_atk_acc": 75.8
    },
    {
        "dataset": "mrpc",
        "type": "task",
        "origin_acc": 74.5,
        "origin_prompt": "Examine the pair of sentences and decide if their meanings are identical by answering with 'equivalent' or 'not_equivalent'.",
        "atk_acc": 65.5,
        "atk_prompt": "Examine the Kair of sentences and edcide if their meanijngs are ibentical by answering with 'equivalent' or 'not_equivalent'. ",
        "transfer_ori_acc": 82.84313725490196,
        "transfer_atk_acc": 79.90196078431373
    },
    {
        "dataset": "mrpc",
        "type": "role",
        "origin_acc": 72.0,
        "origin_prompt": "As a linguistic comparator, review the following pair of sentences and determine their semantic equivalence by choosing 'equivalent' or 'not_equivalent'.",
        "atk_acc": 66.5,
        "atk_prompt": "AV a linguistic comaprator, review the following pair of sentences and determine their semayntic equivalence by chhoosing 'equivalent' or 'not_equivalent'. ",
        "transfer_ori_acc": 79.90196078431373,
        "transfer_atk_acc": 79.16666666666666
    },
    {
        "dataset": "qnli",
        "type": "task",
        "origin_acc": 85.0,
        "origin_prompt": "Examine the context and question, and determine if the context logically implies the answer by selecting 'entailment' or 'not_entailment'.",
        "atk_acc": 77.5,
        "atk_prompt": "Examine the contedt and question, and determine if the cXontext logically implies the answer by selecting 'entailment' or 'not_entailment'. ",
        "transfer_ori_acc": 86.5,
        "transfer_atk_acc": 87.7
    },
    {
        "dataset": "qnli",
        "type": "role",
        "origin_acc": 86.0,
        "origin_prompt": "As a textual analyst, examine if the given context logically implies the answer to the question and indicate your decision with 'entailment' or 'not_entailment'.",
        "atk_acc": 75.0,
        "atk_prompt": "A a textual analyst, xamine if the gPven conSext logiclaly implies the asnwer to the qbuestion and indicate your decision with 'entailment' or 'not_entailment'. ",
        "transfer_ori_acc": 88.8,
        "transfer_atk_acc": 80.10000000000001
    },
    {
        "dataset": "qqp",
        "type": "task",
        "origin_acc": 82.0,
        "origin_prompt": "Can these two statements be considered equal in meaning? Answer with 'equivalent' or 'not_equivalent'.",
        "atk_acc": 80.0,
        "atk_prompt": "Can these two statements be considJred equal in meaning? Answer with 'equivalent' or 'not_equivalent'. ",
        "transfer_ori_acc": 87.6,
        "transfer_atk_acc": 87.9
    },
    {
        "dataset": "qqp",
        "type": "role",
        "origin_acc": 83.0,
        "origin_prompt": "Acting as a question equivalence instrument, determine if the provided questions are equivalent in meaning, answering with 'equivalent' for similar questions or 'not_equivalent' for dissimilar ones.",
        "atk_acc": 81.5,
        "atk_prompt": "Actinm as a question equivalence instrumetn, determine if the provided questions are equivalent in meaning, answering with 'equivalent' for similar questions or 'not_equivalent' for dissimilar ones. ",
        "transfer_ori_acc": 86.5,
        "transfer_atk_acc": 85.6
    },
    {
        "dataset": "rte",
        "type": "task",
        "origin_acc": 85.0,
        "origin_prompt": "Does the relationship between the given sentences represent entailment or not_entailment? Respond with 'entailment' or 'not_entailment'.",
        "atk_acc": 80.5,
        "atk_prompt": "Does the relationshBp between the given sentences repersent entailment or not_entailment? Respond with 'entailment' or 'not_entailment'. ",
        "transfer_ori_acc": 83.75451263537906,
        "transfer_atk_acc": 82.67148014440433
    },
    {
        "dataset": "rte",
        "type": "role",
        "origin_acc": 84.5,
        "origin_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment'.",
        "atk_acc": 81.0,
        "atk_prompt": "In your role as an entailment analysis toIol, aZssess the relationship between the given sentences and cassify it as 'entailment' or 'not_entailment'. ",
        "transfer_ori_acc": 83.03249097472924,
        "transfer_atk_acc": 82.31046931407943
    },
    {
        "dataset": "sst2",
        "type": "task",
        "origin_acc": 97.0,
        "origin_prompt": "Evaluate the sentiment of the given text and classify it as 'positive' or 'negative'.",
        "atk_acc": 94.0,
        "atk_prompt": "ENvaluate the senttment of the giOven text and classify it as 'positive' or 'negative'. ",
        "transfer_ori_acc": 94.61009174311926,
        "transfer_atk_acc": 95.29816513761467
    },
    {
        "dataset": "sst2",
        "type": "role",
        "origin_acc": 97.0,
        "origin_prompt": "As a sentiment classifier, determine whether the following text is 'positive' or 'negative'.",
        "atk_acc": 94.0,
        "atk_prompt": "qAs a setniment classifier, determnne whether the following text is 'positive' or 'negative'. ",
        "transfer_ori_acc": 94.4954128440367,
        "transfer_atk_acc": 95.29816513761467
    },
    {
        "dataset": "wnli",
        "type": "task",
        "origin_acc": 67.61,
        "origin_prompt": "Indicate if the connection between the following sentences is 'entailment' or 'not_entailment'.",
        "atk_acc": 64.79,
        "atk_prompt": "Indicate if the connection between the following sentences is 'entailment' or 'not_entailment'  and true is true  and true is true  and true is true  and true is true  and true is true . ",
        "transfer_ori_acc": 64.7887323943662,
        "transfer_atk_acc": 38.028169014084504
    },
    {
        "dataset": "wnli",
        "type": "role",
        "origin_acc": 64.79,
        "origin_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment'.",
        "atk_acc": 59.15,
        "atk_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment'  and true is true  and true is true  and true is true  and true is true  and true is true . ",
        "transfer_ori_acc": 57.74647887323944,
        "transfer_atk_acc": 46.478873239436616
    },
    {
        "dataset": "mmlu",
        "type": "task",
        "origin_acc": 66.5,
        "origin_prompt": "In relation to the multiple-choice question on {}, please provide the accurate answer by choosing 'A', 'B', 'C', or 'D'.",
        "atk_acc": 63.5,
        "atk_prompt": "ISn ralation to the mulitple-choice question on {}, pleare provide the accurate answer by choosing 'A', 'B', 'C', or 'D'. ",
        "transfer_ori_acc": 46.808510638297875,
        "transfer_atk_acc": 45.39007092198582
    },
    {
        "dataset": "mmlu",
        "type": "role",
        "origin_acc": 65.5,
        "origin_prompt": "As someone well-versed in {}, please address the multiple-choice question below by selecting 'A', 'B', 'C', or 'D'.",
        "atk_acc": 62.5,
        "atk_prompt": "As someone well-versed in {}, please address the mulZtiple-choice question below by selecting 'A', 'B', 'C', or 'D'.",
        "transfer_ori_acc": 46.63120567375886,
        "transfer_atk_acc": 45.744680851063826
    },
    {
        "dataset": "squad_v2",
        "type": "task",
        "origin_acc": 73.25,
        "origin_prompt": "Based on the given context, provide the best possible answer. If there's no answer available in the context, reply with 'unanswerable'.",
        "atk_acc": 67.18,
        "atk_prompt": "Based on the goiven context, provide the best possible answer. IL there's no answer available in the coYtext, reply with 'unanswerable'.",
        "transfer_ori_acc": 87.04992770387507,
        "transfer_atk_acc": 87.54992770387507
    },
    {
        "dataset": "squad_v2",
        "type": "role",
        "origin_acc": 72.68,
        "origin_prompt": "As a subject matter expert, extract the correct answer from the context. If an answer is not present, indicate 'unanswerable'.",
        "atk_acc": 66.77,
        "atk_prompt": "s a subject matter eAxpert, extract the correc answer from the context. If an answer is not presnt, idicate 'unanswerable'.",
        "transfer_ori_acc": 87.04992770387507,
        "transfer_atk_acc": 87.14992770387508
    },
    {
        "dataset": "iwslt",
        "type": "task",
        "origin_acc": 0.23,
        "origin_prompt": "Given the sentence below, perform a machine translation from {} to {}.",
        "atk_acc": 0.23,
        "atk_prompt": "Given the sentence below, perform a mRachine traCslation from {} to {}. ",
        "transfer_ori_acc": 0.21292285800980326,
        "transfer_atk_acc": 0.21552262845169096
    },
    {
        "dataset": "iwslt",
        "type": "role",
        "origin_acc": 0.24,
        "origin_prompt": "Assuming the responsibilities of a professional translator, translate the subsequent text passage from {} to {}.",
        "atk_acc": 0.23,
        "atk_prompt": "Assuming the responsibilities of a professional transaltor, translate the subsequent text passage from {} to {}. ",
        "transfer_ori_acc": 0.22030199520456395,
        "transfer_atk_acc": 0.21973314066426236
    },
    {
        "dataset": "un_multi",
        "type": "task",
        "origin_acc": 0.35,
        "origin_prompt": "Please perform a translation of the given sentence, converting it from {} to {}.",
        "atk_acc": 0.26,
        "atk_prompt": "Plese erform a translation of the Fiven sentence, converwing it from {} to {}. ",
        "transfer_ori_acc": 0.3076276888167265,
        "transfer_atk_acc": 0.3054838402284908
    },
    {
        "dataset": "un_multi",
        "type": "role",
        "origin_acc": 0.35,
        "origin_prompt": "As a qualified translator, convert the given sentence from its original {} language to the target language {}.",
        "atk_acc": 0.35,
        "atk_prompt": "As a qualified translator, convert the given sentence from its original {} language to the targte language {}. ",
        "transfer_ori_acc": 0.31127691224033316,
        "transfer_atk_acc": 0.3078172023513576
    },
    {
        "dataset": "math",
        "type": "task",
        "origin_acc": 23.12,
        "origin_prompt": "Calculate the answer to the following math query about {}.",
        "atk_acc": 18.75,
        "atk_prompt": "Calculate the ansewr to the followiJg math query about {}. ",
        "transfer_ori_acc": 16.25,
        "transfer_atk_acc": 16.25
    },
    {
        "dataset": "math",
        "type": "role",
        "origin_acc": 23.12,
        "origin_prompt": "As a mathematics instructor, calculate the answer to the following problem related to {}.",
        "atk_acc": 20.62,
        "atk_prompt": "As a mathematics insrtuctor, calculate the answer to the following problem Vrelated to {}. ",
        "transfer_ori_acc": 17.5,
        "transfer_atk_acc": 16.25
    }
]