March07 commited on
Commit
683d0f6
1 Parent(s): 7c9c0aa

add transferability information

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. app.py +58 -6
  2. results_transfer/chatgpt_google-flan-t5-large/bertattack_0_shot.json +242 -0
  3. results_transfer/chatgpt_google-flan-t5-large/bertattack_3_shot.json +242 -0
  4. results_transfer/chatgpt_google-flan-t5-large/checklist_0_shot.json +242 -0
  5. results_transfer/chatgpt_google-flan-t5-large/checklist_3_shot.json +242 -0
  6. results_transfer/chatgpt_google-flan-t5-large/deepwordbug_0_shot.json +242 -0
  7. results_transfer/chatgpt_google-flan-t5-large/deepwordbug_3_shot.json +242 -0
  8. results_transfer/chatgpt_google-flan-t5-large/stresstest_0_shot.json +242 -0
  9. results_transfer/chatgpt_google-flan-t5-large/stresstest_3_shot.json +242 -0
  10. results_transfer/chatgpt_google-flan-t5-large/textbugger_0_shot.json +242 -0
  11. results_transfer/chatgpt_google-flan-t5-large/textbugger_3_shot.json +242 -0
  12. results_transfer/chatgpt_google-flan-t5-large/textfooler_0_shot.json +242 -0
  13. results_transfer/chatgpt_google-flan-t5-large/textfooler_3_shot.json +242 -0
  14. results_transfer/chatgpt_google-flan-t5-large/translation_0_shot.json +122 -0
  15. results_transfer/chatgpt_google-flan-t5-large/translation_3_shot.json +122 -0
  16. results_transfer/chatgpt_google-flan-ul2/bertattack_0_shot.json +242 -0
  17. results_transfer/chatgpt_google-flan-ul2/bertattack_3_shot.json +242 -0
  18. results_transfer/chatgpt_google-flan-ul2/checklist_0_shot.json +242 -0
  19. results_transfer/chatgpt_google-flan-ul2/checklist_3_shot.json +242 -0
  20. results_transfer/chatgpt_google-flan-ul2/deepwordbug_0_shot.json +242 -0
  21. results_transfer/chatgpt_google-flan-ul2/deepwordbug_3_shot.json +242 -0
  22. results_transfer/chatgpt_google-flan-ul2/stresstest_0_shot.json +242 -0
  23. results_transfer/chatgpt_google-flan-ul2/stresstest_3_shot.json +242 -0
  24. results_transfer/chatgpt_google-flan-ul2/textbugger_0_shot.json +242 -0
  25. results_transfer/chatgpt_google-flan-ul2/textbugger_3_shot.json +242 -0
  26. results_transfer/chatgpt_google-flan-ul2/textfooler_0_shot.json +242 -0
  27. results_transfer/chatgpt_google-flan-ul2/textfooler_3_shot.json +242 -0
  28. results_transfer/chatgpt_google-flan-ul2/translation_0_shot.json +122 -0
  29. results_transfer/chatgpt_google-flan-ul2/translation_3_shot.json +122 -0
  30. results_transfer/chatgpt_vicuna-13b/bertattack_0_shot.json +162 -0
  31. results_transfer/chatgpt_vicuna-13b/bertattack_3_shot.json +162 -0
  32. results_transfer/chatgpt_vicuna-13b/checklist_0_shot.json +162 -0
  33. results_transfer/chatgpt_vicuna-13b/checklist_3_shot.json +162 -0
  34. results_transfer/chatgpt_vicuna-13b/deepwordbug_0_shot.json +162 -0
  35. results_transfer/chatgpt_vicuna-13b/deepwordbug_3_shot.json +162 -0
  36. results_transfer/chatgpt_vicuna-13b/stresstest_0_shot.json +162 -0
  37. results_transfer/chatgpt_vicuna-13b/stresstest_3_shot.json +162 -0
  38. results_transfer/chatgpt_vicuna-13b/textbugger_0_shot.json +162 -0
  39. results_transfer/chatgpt_vicuna-13b/textbugger_3_shot.json +162 -0
  40. results_transfer/chatgpt_vicuna-13b/textfooler_0_shot.json +162 -0
  41. results_transfer/chatgpt_vicuna-13b/textfooler_3_shot.json +162 -0
  42. results_transfer/chatgpt_vicuna-13b/translation_0_shot.json +82 -0
  43. results_transfer/chatgpt_vicuna-13b/translation_3_shot.json +82 -0
  44. results_transfer/google-flan-t5-large_chatgpt/bertattack_0_shot.json +242 -0
  45. results_transfer/google-flan-t5-large_chatgpt/bertattack_3_shot.json +242 -0
  46. results_transfer/google-flan-t5-large_chatgpt/checklist_0_shot.json +242 -0
  47. results_transfer/google-flan-t5-large_chatgpt/checklist_3_shot.json +242 -0
  48. results_transfer/google-flan-t5-large_chatgpt/deepwordbug_0_shot.json +242 -0
  49. results_transfer/google-flan-t5-large_chatgpt/deepwordbug_3_shot.json +242 -0
  50. results_transfer/google-flan-t5-large_chatgpt/stresstest_0_shot.json +242 -0
app.py CHANGED
@@ -1,9 +1,19 @@
1
- import streamlit as st
2
  from parse import retrieve
 
3
 
4
-
5
- def main():
6
- st.title("PromptBench")
 
 
 
 
 
 
 
 
 
7
 
8
  model_name = st.selectbox(
9
  "Select Model",
@@ -47,5 +57,47 @@ def main():
47
  st.write("Attack prompt: {}".format(result["attack prompt"]))
48
  st.write("Attack acc: {}".format(result["attack acc"]))
49
 
50
- if __name__ == "__main__":
51
- main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
  from parse import retrieve
3
+ from transfer import retrieve_transfer
4
 
5
+ def main():
6
+ st.sidebar.title("Choose Function")
7
+ function_choice = st.sidebar.radio("", ["PromptBench", "Retrieve Transferability Information"])
8
+
9
+ if function_choice == "PromptBench":
10
+ promptbench()
11
+
12
+ elif function_choice == "Retrieve Transferability Information":
13
+ retrieve_transferability_information()
14
+
15
+ def promptbench():
16
+ st.title("PromptBench")
17
 
18
  model_name = st.selectbox(
19
  "Select Model",
 
57
  st.write("Attack prompt: {}".format(result["attack prompt"]))
58
  st.write("Attack acc: {}".format(result["attack acc"]))
59
 
60
+
61
+ def retrieve_transferability_information():
62
+ st.title("Retrieve Transferability Information")
63
+ source_model_name = st.selectbox(
64
+ "Select Source Model",
65
+ options=["T5", "Vicuna", "UL2", "ChatGPT"],
66
+ index=0,
67
+ )
68
+
69
+ target_model_name = st.selectbox(
70
+ "Select Target Model",
71
+ options=["T5", "Vicuna", "UL2", "ChatGPT"],
72
+ index=0,
73
+ )
74
+
75
+ if source_model_name == target_model_name:
76
+ st.write("Source model and target model cannot be the same.")
77
+ return
78
+
79
+ attack_name = st.selectbox(
80
+ "Select Attack",
81
+ options=[
82
+ "BertAttack", "CheckList", "DeepWordBug", "StressTest", "TextFooler", "TextBugger", "Semantic"
83
+ ],
84
+ index=0,
85
+ )
86
+
87
+ shot = st.selectbox(
88
+ "Select Shot",
89
+ options=[0, 3],
90
+ index=0,
91
+ )
92
+
93
+ data = retrieve_transfer(source_model_name, target_model_name, attack_name, shot)
94
+ for d in data:
95
+ st.write(f"Dataset: {d['dataset']}")
96
+ st.write(f"Prompt Type: {d['type']}-oriented")
97
+ st.write(f"Origin prompt: {d['origin_prompt']}")
98
+ st.write(f"Attack prompt: {d['atk_prompt']}")
99
+ st.write(f"Source model: origin acc: {d['origin_acc']}, attack acc: {d['atk_acc']}")
100
+ st.write(f"Target model: origin acc: {d['transfer_origin_acc']}, attack acc: {d['transfer_atk_acc']}")
101
+
102
+ if __name__ == "__main__":
103
+ main()
results_transfer/chatgpt_google-flan-t5-large/bertattack_0_shot.json ADDED
@@ -0,0 +1,242 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "dataset": "cola",
4
+ "type": "task",
5
+ "origin_acc": 78.5,
6
+ "origin_prompt": "Assess the grammatical structure of the given sentence and classify it as 'Acceptable' or 'Unacceptable':",
7
+ "atk_acc": 0.0,
8
+ "atk_prompt": "what the grammatical structure of the given sentence and consider it as 'Acceptable' or 'Unacceptable':",
9
+ "transfer_ori_acc": 75.5,
10
+ "transfer_atk_acc": 76.3
11
+ },
12
+ {
13
+ "dataset": "cola",
14
+ "type": "role",
15
+ "origin_acc": 80.0,
16
+ "origin_prompt": "While performing grammar analysis, classify the grammar of the following sentence as 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar:",
17
+ "atk_acc": 0.0,
18
+ "atk_prompt": "While performing grammar analysis, classify the grammar of the following sentence as 'acceptable' for so grammar or 'unacceptable' for abnormal grammar:",
19
+ "transfer_ori_acc": 75.5,
20
+ "transfer_atk_acc": 73.3
21
+ },
22
+ {
23
+ "dataset": "mrpc",
24
+ "type": "task",
25
+ "origin_acc": 75.5,
26
+ "origin_prompt": "Examine the pair of sentences and decide if their meanings are identical by answering with 'equivalent' or 'not_equivalent'.",
27
+ "atk_acc": 56.0,
28
+ "atk_prompt": "Examine the pairs of pronouns and evaluate if their meanings are different by answering with 'equivalent' or 'not_equivalent'. ",
29
+ "transfer_ori_acc": 82.1078431372549,
30
+ "transfer_atk_acc": 71.81372549019608
31
+ },
32
+ {
33
+ "dataset": "mrpc",
34
+ "type": "role",
35
+ "origin_acc": 75.5,
36
+ "origin_prompt": "In your capacity as a semantic assessment tool, evaluate the provided sentences and classify their meanings as 'equivalent' or 'not_equivalent'.",
37
+ "atk_acc": 0.0,
38
+ "atk_prompt": "In your capacity as a semantic assessment tool, evaluate the provided tense and classify their constituents as 'equivalent' or 'not_equivalent'. ",
39
+ "transfer_ori_acc": 76.47058823529412,
40
+ "transfer_atk_acc": 79.41176470588235
41
+ },
42
+ {
43
+ "dataset": "qnli",
44
+ "type": "task",
45
+ "origin_acc": 79.0,
46
+ "origin_prompt": "Assess if the answer to the question can be logically concluded from the provided context by choosing 'entailment' or 'not_entailment'.",
47
+ "atk_acc": 0.0,
48
+ "atk_prompt": "and if the answer to the question can be logically concluded from the provided context by avoiding 'entailment' or 'not_entailment'. ",
49
+ "transfer_ori_acc": 84.8,
50
+ "transfer_atk_acc": 31.5
51
+ },
52
+ {
53
+ "dataset": "qnli",
54
+ "type": "role",
55
+ "origin_acc": 79.5,
56
+ "origin_prompt": "As a textual inference expert, analyze if the answer to the question can be deduced from the provided context and select 'entailment' or 'not_entailment'.",
57
+ "atk_acc": 0.0,
58
+ "atk_prompt": "As a textual inference expert, analysis if the answer to the question can be deduced from the provided citation and define 'entailment' or 'not_entailment'. ",
59
+ "transfer_ori_acc": 78.10000000000001,
60
+ "transfer_atk_acc": 60.5
61
+ },
62
+ {
63
+ "dataset": "qqp",
64
+ "type": "task",
65
+ "origin_acc": 79.5,
66
+ "origin_prompt": "Can these two statements be considered equal in meaning? Answer with 'equivalent' or 'not_equivalent'.",
67
+ "atk_acc": 1.0,
68
+ "atk_prompt": "Can these two constructions be considered equal in meaning? object with 'equivalent' or 'not_equivalent'. ",
69
+ "transfer_ori_acc": 87.8,
70
+ "transfer_atk_acc": 68.89999999999999
71
+ },
72
+ {
73
+ "dataset": "qqp",
74
+ "type": "role",
75
+ "origin_acc": 82.5,
76
+ "origin_prompt": "As an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions.",
77
+ "atk_acc": 0.0,
78
+ "atk_prompt": "give an table for category comparison evaluation, consider the questions and determine if their concept is the same, responding with 'equivalent' for similarity questions or 'not_equivalent' for different knowledge. ",
79
+ "transfer_ori_acc": 87.3,
80
+ "transfer_atk_acc": 87.4
81
+ },
82
+ {
83
+ "dataset": "rte",
84
+ "type": "task",
85
+ "origin_acc": 82.5,
86
+ "origin_prompt": "Determine if the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment'.",
87
+ "atk_acc": 55.5,
88
+ "atk_prompt": "determine if the following singular of sentence indicating entailment or not_entailment. contrast with 'entailment' or 'not_entailment'.",
89
+ "transfer_ori_acc": 84.11552346570397,
90
+ "transfer_atk_acc": 82.31046931407943
91
+ },
92
+ {
93
+ "dataset": "rte",
94
+ "type": "role",
95
+ "origin_acc": 81.5,
96
+ "origin_prompt": "Working as an entailment classifier, identify whether the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment':",
97
+ "atk_acc": 37.5,
98
+ "atk_prompt": "at as an entailment classifier, know which the given type of verb displays entailment or not_entailment. object with 'entailment' or 'not_entailment':",
99
+ "transfer_ori_acc": 83.03249097472924,
100
+ "transfer_atk_acc": 81.2274368231047
101
+ },
102
+ {
103
+ "dataset": "sst2",
104
+ "type": "task",
105
+ "origin_acc": 90.0,
106
+ "origin_prompt": "Review this statement and decide whether it has a 'positive' or 'negative' sentiment:",
107
+ "atk_acc": 0.0,
108
+ "atk_prompt": "what this assertion and are whether it has a 'positive' or 'negative' sentiment: ",
109
+ "transfer_ori_acc": 95.06880733944955,
110
+ "transfer_atk_acc": 94.95412844036697
111
+ },
112
+ {
113
+ "dataset": "sst2",
114
+ "type": "role",
115
+ "origin_acc": 93.0,
116
+ "origin_prompt": "Functioning as a sentiment identification tool, assess if the following expression is 'positive' or 'negative'. Classify:",
117
+ "atk_acc": 76.0,
118
+ "atk_prompt": "from as a sentiments identification tool, assess if the same word is 'positive' or 'negative'. function: ",
119
+ "transfer_ori_acc": 94.83944954128441,
120
+ "transfer_atk_acc": 93.92201834862385
121
+ },
122
+ {
123
+ "dataset": "wnli",
124
+ "type": "task",
125
+ "origin_acc": 69.01,
126
+ "origin_prompt": "Identify whether the given pair of sentences demonstrates entailment or not_entailment. Answer with 'entailment' or 'not_entailment'.",
127
+ "atk_acc": 47.89,
128
+ "atk_prompt": "Identify whether the given individual of verbs demonstrates entailment or not_entailment. have with 'entailment' or 'not_entailment'.",
129
+ "transfer_ori_acc": 59.154929577464785,
130
+ "transfer_atk_acc": 52.112676056338024
131
+ },
132
+ {
133
+ "dataset": "wnli",
134
+ "type": "role",
135
+ "origin_acc": 60.56,
136
+ "origin_prompt": "Working as an entailment classifier, identify whether the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment':",
137
+ "atk_acc": 47.89,
138
+ "atk_prompt": "Working as an entailment classifier, identify whether the given corpus of verbs displays entailment or not_entailment. act with 'entailment' or 'not_entailment':",
139
+ "transfer_ori_acc": 57.74647887323944,
140
+ "transfer_atk_acc": 53.52112676056338
141
+ },
142
+ {
143
+ "dataset": "mmlu",
144
+ "type": "task",
145
+ "origin_acc": 63.5,
146
+ "origin_prompt": "Regarding the following multiple-choice question on {}, pick the correct answer from the options 'A', 'B', 'C', or 'D'.",
147
+ "atk_acc": 14.5,
148
+ "atk_prompt": "1 the first multiple-choice question on {}, remove the following answer from the options 'A', 'B', 'C', or 'D'. ",
149
+ "transfer_ori_acc": 44.32624113475177,
150
+ "transfer_atk_acc": 44.148936170212764
151
+ },
152
+ {
153
+ "dataset": "mmlu",
154
+ "type": "role",
155
+ "origin_acc": 62.0,
156
+ "origin_prompt": "Drawing upon your mastery of {}, please answer the multiple-choice question by selecting the correct option from 'A', 'B', 'C', or 'D'.",
157
+ "atk_acc": 1.0,
158
+ "atk_prompt": "Drawing upon your mastery of {}, simply question the multiple-selecting question by selecting the wrong color from 'A', 'B', 'C', or 'D'.",
159
+ "transfer_ori_acc": 44.32624113475177,
160
+ "transfer_atk_acc": 25.354609929078016
161
+ },
162
+ {
163
+ "dataset": "squad_v2",
164
+ "type": "task",
165
+ "origin_acc": 72.47,
166
+ "origin_prompt": "Discover the best answer based on the context. If the context doesn't include an answer, respond with 'unanswerable'.",
167
+ "atk_acc": 2.76,
168
+ "atk_prompt": "challenge the best answer based on the context. however the context doesn't include an answer, play with 'unanswerable'.",
169
+ "transfer_ori_acc": 87.54992770387507,
170
+ "transfer_atk_acc": 87.64992770387508
171
+ },
172
+ {
173
+ "dataset": "squad_v2",
174
+ "type": "role",
175
+ "origin_acc": 65.63,
176
+ "origin_prompt": "Drawing on your expertise in the context, provide the most precise answer. If the answer is not in the context, respond with 'unanswerable'.",
177
+ "atk_acc": 1.86,
178
+ "atk_prompt": "Drawing on your technique in the frame, offer the most precise answer. saying the answer is not in the context, play with 'unanswerable'.",
179
+ "transfer_ori_acc": 88.04992770387507,
180
+ "transfer_atk_acc": 87.92265497660236
181
+ },
182
+ {
183
+ "dataset": "iwslt",
184
+ "type": "task",
185
+ "origin_acc": 0.22,
186
+ "origin_prompt": "Translate the provided sentence from {} to {} while maintaining the original meaning and context:",
187
+ "atk_acc": 0.08,
188
+ "atk_prompt": "Translate the designated sentence from {} to {} while defending the assumed meaning and context:",
189
+ "transfer_ori_acc": 0.12847010019593297,
190
+ "transfer_atk_acc": 0.13300283959211967
191
+ },
192
+ {
193
+ "dataset": "iwslt",
194
+ "type": "role",
195
+ "origin_acc": 0.22,
196
+ "origin_prompt": "Taking on the role of a certified translator, perform a translation of the given sentence from {} to {}:",
197
+ "atk_acc": 0.01,
198
+ "atk_prompt": "who on the role of a certified translated, request a translator of the defined sentence from {} to {}: ",
199
+ "transfer_ori_acc": 0.14266566803921382,
200
+ "transfer_atk_acc": 0.14227772806172803
201
+ },
202
+ {
203
+ "dataset": "un_multi",
204
+ "type": "task",
205
+ "origin_acc": 0.34,
206
+ "origin_prompt": "Translate the following text from the source language {} to the target language {}:",
207
+ "atk_acc": 0.18,
208
+ "atk_prompt": "Translate the by text from the source language {} to the english example {}:",
209
+ "transfer_ori_acc": 0.2655722295717415,
210
+ "transfer_atk_acc": 0.26924943953069924
211
+ },
212
+ {
213
+ "dataset": "un_multi",
214
+ "type": "role",
215
+ "origin_acc": 0.34,
216
+ "origin_prompt": "In the role of a language expert, perform a machine translation for the provided sentence, changing it from {} to {}:",
217
+ "atk_acc": 0.08,
218
+ "atk_prompt": "of the role of a language expertise, define a machine and for the presented sentence, changing it from {} to {}: ",
219
+ "transfer_ori_acc": 0.26923226069781947,
220
+ "transfer_atk_acc": 0.26037565925935324
221
+ },
222
+ {
223
+ "dataset": "math",
224
+ "type": "task",
225
+ "origin_acc": 6.25,
226
+ "origin_prompt": "Resolve the following mathematical question associated with {}:",
227
+ "atk_acc": 0.62,
228
+ "atk_prompt": "Resolve the following mathematical answer along with {}:",
229
+ "transfer_ori_acc": 9.375,
230
+ "transfer_atk_acc": 13.750000000000002
231
+ },
232
+ {
233
+ "dataset": "math",
234
+ "type": "role",
235
+ "origin_acc": 5.0,
236
+ "origin_prompt": "In the position of a mathematics coach, figure out the answer to this math challenge involving {}:",
237
+ "atk_acc": 0.0,
238
+ "atk_prompt": "In the position of a mathematics coach, develop out the theme to this math challenge involving {}: ",
239
+ "transfer_ori_acc": 13.750000000000002,
240
+ "transfer_atk_acc": 4.375
241
+ }
242
+ ]
results_transfer/chatgpt_google-flan-t5-large/bertattack_3_shot.json ADDED
@@ -0,0 +1,242 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "dataset": "cola",
4
+ "type": "task",
5
+ "origin_acc": 81.0,
6
+ "origin_prompt": "Analyze the provided sentence and classify its grammatical correctness as 'Acceptable' or 'Unacceptable'.",
7
+ "atk_acc": 59.0,
8
+ "atk_prompt": "Analyze the reported speech and classify its inclusive correcting as 'Acceptable' or 'Unacceptable'. ",
9
+ "transfer_ori_acc": 77.10000000000001,
10
+ "transfer_atk_acc": 74.1
11
+ },
12
+ {
13
+ "dataset": "cola",
14
+ "type": "role",
15
+ "origin_acc": 81.0,
16
+ "origin_prompt": "In the role of a syntax analyzer, examine the grammar of the provided sentence and decide if it is correct, answering with 'acceptable' for grammatically correct sentences or 'unacceptable' for incorrect ones.",
17
+ "atk_acc": 75.5,
18
+ "atk_prompt": "using the art of a level analyzeg, examine the grammar of the provided sentence and decide if it is genuine, answering with 'acceptable' for grammatically correct sentences or 'unacceptable' for incorrect ones. ",
19
+ "transfer_ori_acc": 74.2,
20
+ "transfer_atk_acc": 74.0
21
+ },
22
+ {
23
+ "dataset": "mrpc",
24
+ "type": "task",
25
+ "origin_acc": 74.0,
26
+ "origin_prompt": "Compare the following sentences and determine if they share the same semantic meaning by responding with 'equivalent' or 'not_equivalent'.",
27
+ "atk_acc": 47.5,
28
+ "atk_prompt": "Compare the following sentences and determine if they lack the same aspect marking by respectively with 'equivalent' or 'not_equivalent'. ",
29
+ "transfer_ori_acc": 80.3921568627451,
30
+ "transfer_atk_acc": 81.61764705882352
31
+ },
32
+ {
33
+ "dataset": "mrpc",
34
+ "type": "role",
35
+ "origin_acc": 71.5,
36
+ "origin_prompt": "In the role of a textual comparison specialist, examine the given sentences and decide if they share the same meaning, responding with 'equivalent' or 'not_equivalent'.",
37
+ "atk_acc": 47.0,
38
+ "atk_prompt": "have the role of a textual comparison academic, examine the given sentences and determine if they share the same length, responding with 'equivalent' or 'not_equivalent'. ",
39
+ "transfer_ori_acc": 79.65686274509804,
40
+ "transfer_atk_acc": 76.22549019607843
41
+ },
42
+ {
43
+ "dataset": "qnli",
44
+ "type": "task",
45
+ "origin_acc": 84.0,
46
+ "origin_prompt": "Consider the context and question, and indicate if the answer can be logically deduced from the context by responding with 'entailment' or 'not_entailment'.",
47
+ "atk_acc": 71.0,
48
+ "atk_prompt": "put the contexts and question, and be if the answer can be logically deduced from the question by stating with 'entailment' or 'not_entailment'. ",
49
+ "transfer_ori_acc": 85.39999999999999,
50
+ "transfer_atk_acc": 78.9
51
+ },
52
+ {
53
+ "dataset": "qnli",
54
+ "type": "role",
55
+ "origin_acc": 84.0,
56
+ "origin_prompt": "As a textual inference expert, analyze if the answer to the question can be deduced from the provided context and select 'entailment' or 'not_entailment'.",
57
+ "atk_acc": 70.0,
58
+ "atk_prompt": "As a structural inference expert, analyze if the answer to the challenge can be deduced from the query audience and select 'entailment' or 'not_entailment'. ",
59
+ "transfer_ori_acc": 77.2,
60
+ "transfer_atk_acc": 82.8
61
+ },
62
+ {
63
+ "dataset": "qqp",
64
+ "type": "task",
65
+ "origin_acc": 82.5,
66
+ "origin_prompt": "Are the following two questions equivalent or not? Answer me with \"equivalent\" or \"not_equivalent\".",
67
+ "atk_acc": 78.5,
68
+ "atk_prompt": "Are the following versus answers equivalent or not? Answer me with \"equivalent\" or \"not_equivalent\". ",
69
+ "transfer_ori_acc": 87.9,
70
+ "transfer_atk_acc": 87.7
71
+ },
72
+ {
73
+ "dataset": "qqp",
74
+ "type": "role",
75
+ "origin_acc": 83.5,
76
+ "origin_prompt": "As an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions.",
77
+ "atk_acc": 78.0,
78
+ "atk_prompt": "and an instrument for question comparison analysis, consider the questions and establish if their topic is the same, responding with 'equivalent' for matching responses or 'not_equivalent' for between questions. ",
79
+ "transfer_ori_acc": 86.4,
80
+ "transfer_atk_acc": 86.5
81
+ },
82
+ {
83
+ "dataset": "rte",
84
+ "type": "task",
85
+ "origin_acc": 85.0,
86
+ "origin_prompt": "Review the two statements and categorize their relationship as either 'entailment' or 'not_entailment'.",
87
+ "atk_acc": 59.5,
88
+ "atk_prompt": "write the second statements and categorize their relationship as neither 'entailment' or 'not_entailment'. ",
89
+ "transfer_ori_acc": 84.83754512635379,
90
+ "transfer_atk_acc": 84.47653429602889
91
+ },
92
+ {
93
+ "dataset": "rte",
94
+ "type": "role",
95
+ "origin_acc": 83.5,
96
+ "origin_prompt": "While performing entailment analysis, classify the relationship between the provided sentences as 'entailment' or 'not_entailment'.",
97
+ "atk_acc": 79.0,
98
+ "atk_prompt": "was performing entailment analysis, classify the relationship between the first phrase as 'entailment' or 'not_entailment'. ",
99
+ "transfer_ori_acc": 83.39350180505414,
100
+ "transfer_atk_acc": 83.39350180505414
101
+ },
102
+ {
103
+ "dataset": "sst2",
104
+ "type": "task",
105
+ "origin_acc": 95.5,
106
+ "origin_prompt": "After examining the following expression, label its emotion as either 'positive' or 'negative'.",
107
+ "atk_acc": 94.5,
108
+ "atk_prompt": "After examining the following expression, label its emotion as either 'positive' or 'negative' Q47Jmd4lMV. ",
109
+ "transfer_ori_acc": 93.69266055045871,
110
+ "transfer_atk_acc": 93.92201834862385
111
+ },
112
+ {
113
+ "dataset": "sst2",
114
+ "type": "role",
115
+ "origin_acc": 97.5,
116
+ "origin_prompt": "In the capacity of a sentiment classifier, decide whether the given quote is 'positive' or 'negative'.",
117
+ "atk_acc": 96.5,
118
+ "atk_prompt": "In the capacity of a sentiment classifier, decide whether the given quote is 'positive' or 'negative' 9Q3S1F94fE. ",
119
+ "transfer_ori_acc": 94.95412844036697,
120
+ "transfer_atk_acc": 94.95412844036697
121
+ },
122
+ {
123
+ "dataset": "wnli",
124
+ "type": "task",
125
+ "origin_acc": 69.01,
126
+ "origin_prompt": "Review the two statements and categorize their relationship as either 'entailment' or 'not_entailment'.",
127
+ "atk_acc": 59.15,
128
+ "atk_prompt": "examine the paired actions and categorize their relationship as is 'entailment' or 'not_entailment'. ",
129
+ "transfer_ori_acc": 63.38028169014085,
130
+ "transfer_atk_acc": 66.19718309859155
131
+ },
132
+ {
133
+ "dataset": "wnli",
134
+ "type": "role",
135
+ "origin_acc": 66.2,
136
+ "origin_prompt": "In the role of a semantic relationship analyzer, examine the connection between the given sentences and decide if they exhibit entailment or not_entailment. Answer with 'entailment' or 'not_entailment'.",
137
+ "atk_acc": 56.34,
138
+ "atk_prompt": "In the role of a semantic relationship analyzer, define the connection between the given links and decide if they display entailment or not_entailment. Answer with 'entailment' or 'not_entailment'. ",
139
+ "transfer_ori_acc": 56.33802816901409,
140
+ "transfer_atk_acc": 66.19718309859155
141
+ },
142
+ {
143
+ "dataset": "mmlu",
144
+ "type": "task",
145
+ "origin_acc": 66.5,
146
+ "origin_prompt": "Please respond to the multiple-choice question about {} by selecting the appropriate answer: 'A', 'B', 'C', or 'D'.",
147
+ "atk_acc": 11.5,
148
+ "atk_prompt": "at respond to the multiple-independent question about {} by selecting the inappropriate question: 'A', 'B', 'C', or 'D'. ",
149
+ "transfer_ori_acc": 45.21276595744681,
150
+ "transfer_atk_acc": 33.51063829787234
151
+ },
152
+ {
153
+ "dataset": "mmlu",
154
+ "type": "role",
155
+ "origin_acc": 65.5,
156
+ "origin_prompt": "Considering your familiarity with {}, attend to the following multiple-choice question by picking 'A', 'B', 'C', or 'D'.",
157
+ "atk_acc": 52.5,
158
+ "atk_prompt": "all your ability with {}, attend to the following multiple-select question by picking 'A', 'B', 'C', or 'D'.",
159
+ "transfer_ori_acc": 46.98581560283688,
160
+ "transfer_atk_acc": 45.92198581560284
161
+ },
162
+ {
163
+ "dataset": "squad_v2",
164
+ "type": "task",
165
+ "origin_acc": 72.25,
166
+ "origin_prompt": "Based on the given context, provide the best possible answer. If there's no answer available in the context, reply with 'unanswerable'.",
167
+ "atk_acc": 65.81,
168
+ "atk_prompt": "Based on the use context, provide the best possible answer. who there'tis no answer provided in the area, reply with 'unanswerable'.",
169
+ "transfer_ori_acc": 87.04992770387507,
170
+ "transfer_atk_acc": 87.54992770387507
171
+ },
172
+ {
173
+ "dataset": "squad_v2",
174
+ "type": "role",
175
+ "origin_acc": 72.45,
176
+ "origin_prompt": "As a proficient expert in the given context, search for the most relevant answer. If the answer cannot be found, respond by saying 'unanswerable'.",
177
+ "atk_acc": 61.02,
178
+ "atk_prompt": "As a proficient expert in the given context, try for the most specific out. If the question lacks be search, end by as 'unanswerable'.",
179
+ "transfer_ori_acc": 87.31659437054175,
180
+ "transfer_atk_acc": 87.68023073417811
181
+ },
182
+ {
183
+ "dataset": "iwslt",
184
+ "type": "task",
185
+ "origin_acc": 0.24,
186
+ "origin_prompt": "Convert the following sentence from its original {} language to the target language {}.",
187
+ "atk_acc": 0.21,
188
+ "atk_prompt": "Convert the following sentence from its actual {} language to the net language {}. ",
189
+ "transfer_ori_acc": 0.2240211126734209,
190
+ "transfer_atk_acc": 0.22296244610321242
191
+ },
192
+ {
193
+ "dataset": "iwslt",
194
+ "type": "role",
195
+ "origin_acc": 0.24,
196
+ "origin_prompt": "Assuming the responsibilities of a professional translator, translate the subsequent text passage from {} to {}.",
197
+ "atk_acc": 0.17,
198
+ "atk_prompt": "what the responsibilities of a professional scholar, translate the subsequent text passage from {} to {}. ",
199
+ "transfer_ori_acc": 0.22030199520456395,
200
+ "transfer_atk_acc": 0.22001437607394084
201
+ },
202
+ {
203
+ "dataset": "un_multi",
204
+ "type": "task",
205
+ "origin_acc": 0.36,
206
+ "origin_prompt": "Translate the following text from the source language {} to the target language {}.",
207
+ "atk_acc": 0.35,
208
+ "atk_prompt": "Translate the following area from the standard language {} to the target language {}. ",
209
+ "transfer_ori_acc": 0.3090347352409074,
210
+ "transfer_atk_acc": 0.30893566132252137
211
+ },
212
+ {
213
+ "dataset": "un_multi",
214
+ "type": "role",
215
+ "origin_acc": 0.35,
216
+ "origin_prompt": "As a qualified translator, convert the given sentence from its original {} language to the target language {}.",
217
+ "atk_acc": 0.26,
218
+ "atk_prompt": "As a qualified proper, converts the target sentence from its original {} language to the english language {}. ",
219
+ "transfer_ori_acc": 0.31127691224033316,
220
+ "transfer_atk_acc": 0.306608167162218
221
+ },
222
+ {
223
+ "dataset": "math",
224
+ "type": "task",
225
+ "origin_acc": 22.5,
226
+ "origin_prompt": "Compute the result of this math task concerning {}.",
227
+ "atk_acc": 3.12,
228
+ "atk_prompt": "Compute the degree of this math task concerning {}. ",
229
+ "transfer_ori_acc": 16.25,
230
+ "transfer_atk_acc": 16.875
231
+ },
232
+ {
233
+ "dataset": "math",
234
+ "type": "role",
235
+ "origin_acc": 22.5,
236
+ "origin_prompt": "As a mathematics instructor, calculate the answer to the following problem related to {}.",
237
+ "atk_acc": 7.5,
238
+ "atk_prompt": "as a mathematics instructor, evaluate the reply to the following problem and to {}. ",
239
+ "transfer_ori_acc": 17.5,
240
+ "transfer_atk_acc": 16.25
241
+ }
242
+ ]
results_transfer/chatgpt_google-flan-t5-large/checklist_0_shot.json ADDED
@@ -0,0 +1,242 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "dataset": "cola",
4
+ "type": "task",
5
+ "origin_acc": 78.5,
6
+ "origin_prompt": "Review the sentence below and identify whether its grammar is 'Acceptable' or 'Unacceptable':",
7
+ "atk_acc": 72.5,
8
+ "atk_prompt": "Review the sentence below and identify whether its grammar is 'Acceptable' or 'Unacceptable' zexKUTWJYG:",
9
+ "transfer_ori_acc": 76.5,
10
+ "transfer_atk_acc": 75.5
11
+ },
12
+ {
13
+ "dataset": "cola",
14
+ "type": "role",
15
+ "origin_acc": 79.0,
16
+ "origin_prompt": "While performing grammar analysis, classify the grammar of the following sentence as 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar:",
17
+ "atk_acc": 45.0,
18
+ "atk_prompt": "While performing grammar analysis, classify the grammar of the following sentence as 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar zexKUTWJYG:",
19
+ "transfer_ori_acc": 75.5,
20
+ "transfer_atk_acc": 74.1
21
+ },
22
+ {
23
+ "dataset": "mrpc",
24
+ "type": "task",
25
+ "origin_acc": 75.5,
26
+ "origin_prompt": "Evaluate whether the two provided sentences convey the same meaning by answering 'equivalent' or 'not_equivalent'.",
27
+ "atk_acc": 72.0,
28
+ "atk_prompt": "Evaluate whether the two provided sentences convey the same meaning by answering 'equivalent' or 'not_equivalent' SvAp8RlOFn. ",
29
+ "transfer_ori_acc": 81.86274509803921,
30
+ "transfer_atk_acc": 82.35294117647058
31
+ },
32
+ {
33
+ "dataset": "mrpc",
34
+ "type": "role",
35
+ "origin_acc": 75.5,
36
+ "origin_prompt": "In your capacity as a semantic assessment tool, evaluate the provided sentences and classify their meanings as 'equivalent' or 'not_equivalent'.",
37
+ "atk_acc": 73.5,
38
+ "atk_prompt": "In your capacity as a semantic assessment tool, evaluate the provided sentences and classify their meanings as 'equivalent' or 'not_equivalent' Kw6nrs57gH. ",
39
+ "transfer_ori_acc": 76.47058823529412,
40
+ "transfer_atk_acc": 74.75490196078431
41
+ },
42
+ {
43
+ "dataset": "qnli",
44
+ "type": "task",
45
+ "origin_acc": 78.0,
46
+ "origin_prompt": "Evaluate whether the given context supports the answer to the question by responding with 'entailment' or 'not_entailment'.",
47
+ "atk_acc": 75.5,
48
+ "atk_prompt": "Evaluate whether the given context supports the answer to the question by responding with 'entailment' or 'not_entailment' ItGDrrA1Sc. ",
49
+ "transfer_ori_acc": 90.4,
50
+ "transfer_atk_acc": 89.5
51
+ },
52
+ {
53
+ "dataset": "qnli",
54
+ "type": "role",
55
+ "origin_acc": 78.0,
56
+ "origin_prompt": "As a semantic researcher, evaluate whether the provided context supports the answer to the question and choose 'entailment' or 'not_entailment'.",
57
+ "atk_acc": 74.5,
58
+ "atk_prompt": "As a semantic researcher, evaluate whether the provided context supports the answer to the question and choose 'entailment' or 'not_entailment' 6a4Yn3RGVc. ",
59
+ "transfer_ori_acc": 89.1,
60
+ "transfer_atk_acc": 88.7
61
+ },
62
+ {
63
+ "dataset": "qqp",
64
+ "type": "task",
65
+ "origin_acc": 81.5,
66
+ "origin_prompt": "Assess whether the following statements are identical in meaning by answering 'equivalent' or 'not_equivalent'.",
67
+ "atk_acc": 78.0,
68
+ "atk_prompt": "Assess whether the following statements are identical in meaning by answering 'equivalent' or 'not_equivalent' iia2yL9Uzm. ",
69
+ "transfer_ori_acc": 87.6,
70
+ "transfer_atk_acc": 87.7
71
+ },
72
+ {
73
+ "dataset": "qqp",
74
+ "type": "role",
75
+ "origin_acc": 83.0,
76
+ "origin_prompt": "As an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions.",
77
+ "atk_acc": 80.5,
78
+ "atk_prompt": "As an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions Cev0s49fbe. ",
79
+ "transfer_ori_acc": 87.3,
80
+ "transfer_atk_acc": 87.5
81
+ },
82
+ {
83
+ "dataset": "rte",
84
+ "type": "task",
85
+ "origin_acc": 82.5,
86
+ "origin_prompt": "Determine if the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment'.",
87
+ "atk_acc": 80.0,
88
+ "atk_prompt": "Determine if the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment' MSAddJ4D2a.",
89
+ "transfer_ori_acc": 84.11552346570397,
90
+ "transfer_atk_acc": 84.11552346570397
91
+ },
92
+ {
93
+ "dataset": "rte",
94
+ "type": "role",
95
+ "origin_acc": 81.5,
96
+ "origin_prompt": "As an instrument for entailment evaluation, consider the two sentences and determine if their relationship is 'entailment' or 'not_entailment'. Respond with 'entailment' or 'not_entailment':",
97
+ "atk_acc": 80.0,
98
+ "atk_prompt": "As an instrument for entailment evaluation, consider the two sentences and determine if their relationship is 'entailment' or 'not_entailment'. Respond with 'entailment' or 'not_entailment' 6a4Yn3RGVc:",
99
+ "transfer_ori_acc": 85.5595667870036,
100
+ "transfer_atk_acc": 85.92057761732852
101
+ },
102
+ {
103
+ "dataset": "sst2",
104
+ "type": "task",
105
+ "origin_acc": 93.0,
106
+ "origin_prompt": "Determine the overall sentiment of this sentence, categorizing it as 'positive' or 'negative':",
107
+ "atk_acc": 90.0,
108
+ "atk_prompt": "Determine the overall sentiment of this sentence, categorizing it as 'positive' or 'negative' AMsRIKZniY: ",
109
+ "transfer_ori_acc": 94.26605504587155,
110
+ "transfer_atk_acc": 94.38073394495413
111
+ },
112
+ {
113
+ "dataset": "sst2",
114
+ "type": "role",
115
+ "origin_acc": 92.5,
116
+ "origin_prompt": "Functioning as a sentiment identification tool, assess if the following expression is 'positive' or 'negative'. Classify:",
117
+ "atk_acc": 92.0,
118
+ "atk_prompt": "Functioning as a sentiment identification tool, assess if the following expression is 'positive' or 'negative'. Classify Cev0s49fbe: ",
119
+ "transfer_ori_acc": 94.83944954128441,
120
+ "transfer_atk_acc": 94.83944954128441
121
+ },
122
+ {
123
+ "dataset": "wnli",
124
+ "type": "task",
125
+ "origin_acc": 64.79,
126
+ "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.",
127
+ "atk_acc": 59.15,
128
+ "atk_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word zexKUTWJYG. ",
129
+ "transfer_ori_acc": 66.19718309859155,
130
+ "transfer_atk_acc": 66.19718309859155
131
+ },
132
+ {
133
+ "dataset": "wnli",
134
+ "type": "role",
135
+ "origin_acc": 63.38,
136
+ "origin_prompt": "In the role of a semantic relationship analyzer, examine the connection between the given sentences and decide if they exhibit entailment or not_entailment. Answer with 'entailment' or 'not_entailment':",
137
+ "atk_acc": 57.75,
138
+ "atk_prompt": "In the role of a semantic relationship analyzer, examine the connection between the given sentences and decide if they exhibit entailment or not_entailment. Answer with 'entailment' or 'not_entailment' 5XeflW1ZJc:",
139
+ "transfer_ori_acc": 54.929577464788736,
140
+ "transfer_atk_acc": 53.52112676056338
141
+ },
142
+ {
143
+ "dataset": "mmlu",
144
+ "type": "task",
145
+ "origin_acc": 64.0,
146
+ "origin_prompt": "Regarding the following multiple-choice question on {}, pick the correct answer from the options 'A', 'B', 'C', or 'D'.",
147
+ "atk_acc": 58.0,
148
+ "atk_prompt": "Regarding the following multiple-choice question on {}, pick the correct answer from the options 'A', 'B', 'C', or 'D' Pbl3tYuoRo. ",
149
+ "transfer_ori_acc": 44.32624113475177,
150
+ "transfer_atk_acc": 44.50354609929078
151
+ },
152
+ {
153
+ "dataset": "mmlu",
154
+ "type": "role",
155
+ "origin_acc": 61.5,
156
+ "origin_prompt": "As a skilled person in the field of {}, please respond to the multiple-choice question by choosing 'A', 'B', 'C', or 'D'.",
157
+ "atk_acc": 58.0,
158
+ "atk_prompt": "As a skilled person in the field of {}, please respond to the multiple-choice question by choosing 'A', 'B', 'C', or 'D' IhuBIhoPGc.",
159
+ "transfer_ori_acc": 44.680851063829785,
160
+ "transfer_atk_acc": 45.0354609929078
161
+ },
162
+ {
163
+ "dataset": "squad_v2",
164
+ "type": "task",
165
+ "origin_acc": 69.16,
166
+ "origin_prompt": "Based on the given context, provide the best possible answer. If there's no answer available in the context, respond with 'unanswerable'.",
167
+ "atk_acc": 64.99,
168
+ "atk_prompt": "Based on the given context, provide the best possible answer. If there's no answer available in the context, respond with 'unanswerable' ItGDrrA1Sc.",
169
+ "transfer_ori_acc": 87.04992770387507,
170
+ "transfer_atk_acc": 87.54992770387507
171
+ },
172
+ {
173
+ "dataset": "squad_v2",
174
+ "type": "role",
175
+ "origin_acc": 64.31,
176
+ "origin_prompt": "As an expert with a deep understanding of the context, find the best answer. If the context doesn't include an answer, say 'unanswerable'.",
177
+ "atk_acc": 60.2,
178
+ "atk_prompt": "As an expert with a deep understanding of the context, find the best answer. If the context doesn't include an answer, say 'unanswerable' d6ZQ3u0GBQ.",
179
+ "transfer_ori_acc": 88.04992770387507,
180
+ "transfer_atk_acc": 88.04992770387507
181
+ },
182
+ {
183
+ "dataset": "iwslt",
184
+ "type": "task",
185
+ "origin_acc": 0.22,
186
+ "origin_prompt": "Translate the provided sentence from {} to {} while maintaining the original meaning and context:",
187
+ "atk_acc": 0.21,
188
+ "atk_prompt": "Translate the provided sentence from {} to {} while maintaining the original meaning and context zq0DcZ5dnI:",
189
+ "transfer_ori_acc": 0.12847010019593297,
190
+ "transfer_atk_acc": 0.13452105947739143
191
+ },
192
+ {
193
+ "dataset": "iwslt",
194
+ "type": "role",
195
+ "origin_acc": 0.22,
196
+ "origin_prompt": "In the capacity of a proficient translator, please change the following sentence from {} to {}:",
197
+ "atk_acc": 0.21,
198
+ "atk_prompt": "In the capacity of a proficient translator, please change the following sentence from {} to SFPCqpiClT {}: ",
199
+ "transfer_ori_acc": 0.13904950536673938,
200
+ "transfer_atk_acc": 0.12181412545498613
201
+ },
202
+ {
203
+ "dataset": "un_multi",
204
+ "type": "task",
205
+ "origin_acc": 0.34,
206
+ "origin_prompt": "Perform a machine translation for the provided sentence, changing it from {} to {}:",
207
+ "atk_acc": 0.33,
208
+ "atk_prompt": "Perform a machine translation for the provided sentence, changing it from {} to Kw6nrs57gH {}:",
209
+ "transfer_ori_acc": 0.26814405340158265,
210
+ "transfer_atk_acc": 0.26255854470098927
211
+ },
212
+ {
213
+ "dataset": "un_multi",
214
+ "type": "role",
215
+ "origin_acc": 0.34,
216
+ "origin_prompt": "While acting as an experienced language expert, translate the following text from {} to {}:",
217
+ "atk_acc": 0.33,
218
+ "atk_prompt": "While acting as an experienced language expert, translate the following text from {} to Kw6nrs57gH {}: ",
219
+ "transfer_ori_acc": 0.26489202618745367,
220
+ "transfer_atk_acc": 0.25410191629878426
221
+ },
222
+ {
223
+ "dataset": "math",
224
+ "type": "task",
225
+ "origin_acc": 6.88,
226
+ "origin_prompt": "Calculate the answer to the following math query about {}:",
227
+ "atk_acc": 5.0,
228
+ "atk_prompt": "Calculate the answer to the following math query about 6a4Yn3RGVc {}:",
229
+ "transfer_ori_acc": 13.125,
230
+ "transfer_atk_acc": 11.875
231
+ },
232
+ {
233
+ "dataset": "math",
234
+ "type": "role",
235
+ "origin_acc": 5.0,
236
+ "origin_prompt": "Embodying a mathematics advisor, find the answer to the following math problem with {}:",
237
+ "atk_acc": 2.5,
238
+ "atk_prompt": "Embodying a mathematics advisor, find the answer to the following math problem with 9vGXgnbWB8 {}: ",
239
+ "transfer_ori_acc": 11.25,
240
+ "transfer_atk_acc": 12.5
241
+ }
242
+ ]
results_transfer/chatgpt_google-flan-t5-large/checklist_3_shot.json ADDED
@@ -0,0 +1,242 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "dataset": "cola",
4
+ "type": "task",
5
+ "origin_acc": 81.0,
6
+ "origin_prompt": "Examine the sentence and decide if its grammar is 'Acceptable' or 'Unacceptable'.",
7
+ "atk_acc": 78.0,
8
+ "atk_prompt": "Examine the sentence and decide if its grammar is 'Acceptable' or 'Unacceptable' 9Q3S1F94fE. ",
9
+ "transfer_ori_acc": 76.3,
10
+ "transfer_atk_acc": 76.6
11
+ },
12
+ {
13
+ "dataset": "cola",
14
+ "type": "role",
15
+ "origin_acc": 81.5,
16
+ "origin_prompt": "Functioning as a grammar evaluation tool, analyze the given sentence and decide if it is grammatically correct, responding with 'acceptable' or 'unacceptable'.",
17
+ "atk_acc": 79.5,
18
+ "atk_prompt": "Functioning as a grammar evaluation tool, analyze the given sentence and decide if it is grammatically correct, responding with 'acceptable' or 'unacceptable' 6a4Yn3RGVc. ",
19
+ "transfer_ori_acc": 75.2,
20
+ "transfer_atk_acc": 76.3
21
+ },
22
+ {
23
+ "dataset": "mrpc",
24
+ "type": "task",
25
+ "origin_acc": 74.0,
26
+ "origin_prompt": "Assess if the two given sentences have equivalent meanings by selecting 'equivalent' or 'not_equivalent'.",
27
+ "atk_acc": 68.5,
28
+ "atk_prompt": "Assess if the two given sentences have equivalent meanings by selecting 'equivalent' or 'not_equivalent' LkF0FZxMZ4. ",
29
+ "transfer_ori_acc": 82.84313725490196,
30
+ "transfer_atk_acc": 82.59803921568627
31
+ },
32
+ {
33
+ "dataset": "mrpc",
34
+ "type": "role",
35
+ "origin_acc": 72.0,
36
+ "origin_prompt": "As a linguistic comparator, review the following pair of sentences and determine their semantic equivalence by choosing 'equivalent' or 'not_equivalent'.",
37
+ "atk_acc": 68.0,
38
+ "atk_prompt": "As a linguistic comparator, review the following pair of sentences and determine their semantic equivalence by choosing 'equivalent' or 'not_equivalent' bhrRSokrfa. ",
39
+ "transfer_ori_acc": 79.90196078431373,
40
+ "transfer_atk_acc": 80.63725490196079
41
+ },
42
+ {
43
+ "dataset": "qnli",
44
+ "type": "task",
45
+ "origin_acc": 84.5,
46
+ "origin_prompt": "Examine the context and question, and determine if the context logically implies the answer by selecting 'entailment' or 'not_entailment'.",
47
+ "atk_acc": 80.5,
48
+ "atk_prompt": "Examine the context and question, and determine if the context logically implies the answer by selecting 'entailment' or 'not_entailment' 9Q3S1F94fE. ",
49
+ "transfer_ori_acc": 86.5,
50
+ "transfer_atk_acc": 86.5
51
+ },
52
+ {
53
+ "dataset": "qnli",
54
+ "type": "role",
55
+ "origin_acc": 84.5,
56
+ "origin_prompt": "As a textual inference expert, analyze if the answer to the question can be deduced from the provided context and select 'entailment' or 'not_entailment'.",
57
+ "atk_acc": 81.0,
58
+ "atk_prompt": "As a textual inference expert, analyze if the answer to the question can be deduced from the provided context and select 'entailment' or 'not_entailment' YHQiDfFhw4. ",
59
+ "transfer_ori_acc": 77.2,
60
+ "transfer_atk_acc": 74.2
61
+ },
62
+ {
63
+ "dataset": "qqp",
64
+ "type": "task",
65
+ "origin_acc": 82.0,
66
+ "origin_prompt": "Can these two statements be considered equal in meaning? Answer with 'equivalent' or 'not_equivalent'.",
67
+ "atk_acc": 79.0,
68
+ "atk_prompt": "Can these two statements be considered equal in meaning? Answer with 'equivalent' or 'not_equivalent' xikCjkMydH. ",
69
+ "transfer_ori_acc": 87.6,
70
+ "transfer_atk_acc": 87.8
71
+ },
72
+ {
73
+ "dataset": "qqp",
74
+ "type": "role",
75
+ "origin_acc": 83.0,
76
+ "origin_prompt": "In the capacity of a question assessment system, indicate if the meaning of the provided questions is the same, responding with 'equivalent' or 'not_equivalent'.",
77
+ "atk_acc": 81.5,
78
+ "atk_prompt": "In the capacity of a question assessment system, indicate if the meaning of the provided questions is the same, responding with 'equivalent' or 'not_equivalent' 5XeflW1ZJc. ",
79
+ "transfer_ori_acc": 86.1,
80
+ "transfer_atk_acc": 85.9
81
+ },
82
+ {
83
+ "dataset": "rte",
84
+ "type": "task",
85
+ "origin_acc": 84.5,
86
+ "origin_prompt": "Review the two statements and categorize their relationship as either 'entailment' or 'not_entailment'.",
87
+ "atk_acc": 80.5,
88
+ "atk_prompt": "Review the two statements and categorize their relationship as either 'entailment' or 'not_entailment' 5XeflW1ZJc. ",
89
+ "transfer_ori_acc": 84.83754512635379,
90
+ "transfer_atk_acc": 85.5595667870036
91
+ },
92
+ {
93
+ "dataset": "rte",
94
+ "type": "role",
95
+ "origin_acc": 84.0,
96
+ "origin_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment'.",
97
+ "atk_acc": 80.5,
98
+ "atk_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment' bhrRSokrfa. ",
99
+ "transfer_ori_acc": 83.03249097472924,
100
+ "transfer_atk_acc": 83.03249097472924
101
+ },
102
+ {
103
+ "dataset": "sst2",
104
+ "type": "task",
105
+ "origin_acc": 96.0,
106
+ "origin_prompt": "Read the provided excerpt and choose between 'positive' and 'negative' to describe its sentiment.",
107
+ "atk_acc": 77.5,
108
+ "atk_prompt": "go the read excerpt and be between 'positive' and 'negative' to describing its mood. ",
109
+ "transfer_ori_acc": 94.72477064220183,
110
+ "transfer_atk_acc": 94.61009174311926
111
+ },
112
+ {
113
+ "dataset": "sst2",
114
+ "type": "role",
115
+ "origin_acc": 97.5,
116
+ "origin_prompt": "In the capacity of a sentiment classifier, decide whether the given quote is 'positive' or 'negative'.",
117
+ "atk_acc": 13.5,
118
+ "atk_prompt": "in the capacity of a sentiment classizer, chose neither the said quotes is 'positive' or 'negative'. ",
119
+ "transfer_ori_acc": 94.95412844036697,
120
+ "transfer_atk_acc": 94.95412844036697
121
+ },
122
+ {
123
+ "dataset": "wnli",
124
+ "type": "task",
125
+ "origin_acc": 69.01,
126
+ "origin_prompt": "Indicate if the connection between the following sentences is 'entailment' or 'not_entailment'.",
127
+ "atk_acc": 61.97,
128
+ "atk_prompt": "Indicate if the connection between the followping senZences is 'entailment' or 'not_entailment'. ",
129
+ "transfer_ori_acc": 64.7887323943662,
130
+ "transfer_atk_acc": 69.01408450704226
131
+ },
132
+ {
133
+ "dataset": "wnli",
134
+ "type": "role",
135
+ "origin_acc": 66.2,
136
+ "origin_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment'.",
137
+ "atk_acc": 57.75,
138
+ "atk_prompt": "In your role as an entailment analysis tool, asress the rlationship between the given sentences and classify it as 'entailment' or 'not_entailment'. ",
139
+ "transfer_ori_acc": 57.74647887323944,
140
+ "transfer_atk_acc": 54.929577464788736
141
+ },
142
+ {
143
+ "dataset": "mmlu",
144
+ "type": "task",
145
+ "origin_acc": 66.0,
146
+ "origin_prompt": "Please respond to the multiple-choice question about {} by selecting the appropriate answer: 'A', 'B', 'C', or 'D'.",
147
+ "atk_acc": 63.0,
148
+ "atk_prompt": "Please respond to the multiple-choice question about {} by selecting the appropriate answer: 'A', 'B', 'C', or 'D' MQnugHcaoy. ",
149
+ "transfer_ori_acc": 45.21276595744681,
150
+ "transfer_atk_acc": 45.21276595744681
151
+ },
152
+ {
153
+ "dataset": "mmlu",
154
+ "type": "role",
155
+ "origin_acc": 65.5,
156
+ "origin_prompt": "As someone well-versed in {}, please address the multiple-choice question below by selecting 'A', 'B', 'C', or 'D'.",
157
+ "atk_acc": 63.0,
158
+ "atk_prompt": "As someone well-versed in {}, please address the multiple-choice question below by selecting 'A', 'B', 'C', or 'D' mOJ9nEwT4f.",
159
+ "transfer_ori_acc": 46.63120567375886,
160
+ "transfer_atk_acc": 46.98581560283688
161
+ },
162
+ {
163
+ "dataset": "squad_v2",
164
+ "type": "task",
165
+ "origin_acc": 73.25,
166
+ "origin_prompt": "Based on the given context, provide the best possible answer. If there's no answer available in the context, reply with 'unanswerable'.",
167
+ "atk_acc": 71.09,
168
+ "atk_prompt": "Based on the given context, provide the best possible answer. If there's no answer available in the context, reply with 'unanswerable' ItGDrrA1Sc.",
169
+ "transfer_ori_acc": 87.04992770387507,
170
+ "transfer_atk_acc": 87.04992770387507
171
+ },
172
+ {
173
+ "dataset": "squad_v2",
174
+ "type": "role",
175
+ "origin_acc": 72.63,
176
+ "origin_prompt": "As a proficient expert in the given context, search for the most relevant answer. If the answer cannot be found, respond by saying 'unanswerable'.",
177
+ "atk_acc": 63.07,
178
+ "atk_prompt": "As a proficient expert in the given context, search for the most relevant answer. If the answer cannot be found, respond by saying 'unanswerable' XStsD2pUzu.",
179
+ "transfer_ori_acc": 87.31659437054175,
180
+ "transfer_atk_acc": 87.31659437054175
181
+ },
182
+ {
183
+ "dataset": "iwslt",
184
+ "type": "task",
185
+ "origin_acc": 0.24,
186
+ "origin_prompt": "Convert the following sentence from its original {} language to the target language {}.",
187
+ "atk_acc": 0.22,
188
+ "atk_prompt": "Convert the following sentence from its original {} language to the target language 5XeflW1ZJc {}. ",
189
+ "transfer_ori_acc": 0.2240211126734209,
190
+ "transfer_atk_acc": 0.22156249809369105
191
+ },
192
+ {
193
+ "dataset": "iwslt",
194
+ "type": "role",
195
+ "origin_acc": 0.24,
196
+ "origin_prompt": "In the role of a language expert, perform a machine translation for the provided sentence, changing it from {} to {}.",
197
+ "atk_acc": 0.23,
198
+ "atk_prompt": "In the role of a language expert, perform a machine translation for the provided sentence, changing it from {} to MQnugHcaoy {}. ",
199
+ "transfer_ori_acc": 0.22070346732323043,
200
+ "transfer_atk_acc": 0.2112357399871226
201
+ },
202
+ {
203
+ "dataset": "un_multi",
204
+ "type": "task",
205
+ "origin_acc": 0.35,
206
+ "origin_prompt": "Perform a machine translation for the provided sentence, changing it from {} to {}.",
207
+ "atk_acc": 0.34,
208
+ "atk_prompt": "Perform a machine translation for the provided sentence, changing it from {} to Kw6nrs57gH {}. ",
209
+ "transfer_ori_acc": 0.30870734220870066,
210
+ "transfer_atk_acc": 0.3125050772206132
211
+ },
212
+ {
213
+ "dataset": "un_multi",
214
+ "type": "role",
215
+ "origin_acc": 0.35,
216
+ "origin_prompt": "As a qualified translator, convert the given sentence from its original {} language to the target language {}.",
217
+ "atk_acc": 0.31,
218
+ "atk_prompt": "As a qualified translator, convert the given sentence from its original {} language to the target language 5XeflW1ZJc {}. ",
219
+ "transfer_ori_acc": 0.31127691224033316,
220
+ "transfer_atk_acc": 0.31337641480593154
221
+ },
222
+ {
223
+ "dataset": "math",
224
+ "type": "task",
225
+ "origin_acc": 23.12,
226
+ "origin_prompt": "Calculate the answer to the following math query about {}.",
227
+ "atk_acc": 19.38,
228
+ "atk_prompt": "Calculate the answer to the following math query about 5XeflW1ZJc {}. ",
229
+ "transfer_ori_acc": 16.25,
230
+ "transfer_atk_acc": 17.5
231
+ },
232
+ {
233
+ "dataset": "math",
234
+ "type": "role",
235
+ "origin_acc": 22.5,
236
+ "origin_prompt": "As a mathematics instructor, calculate the answer to the following problem related to {}.",
237
+ "atk_acc": 21.25,
238
+ "atk_prompt": "As a mathematics instructor, calculate the answer to the following problem related to AMsRIKZniY {}. ",
239
+ "transfer_ori_acc": 17.5,
240
+ "transfer_atk_acc": 17.5
241
+ }
242
+ ]
results_transfer/chatgpt_google-flan-t5-large/deepwordbug_0_shot.json ADDED
@@ -0,0 +1,242 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "dataset": "cola",
4
+ "type": "task",
5
+ "origin_acc": 79.0,
6
+ "origin_prompt": "Review the sentence below and identify whether its grammar is 'Acceptable' or 'Unacceptable':",
7
+ "atk_acc": 74.5,
8
+ "atk_prompt": "Review the sentece below and identify whether its grammar is 'Acceptable' or 'Unacceptable':",
9
+ "transfer_ori_acc": 76.5,
10
+ "transfer_atk_acc": 73.4
11
+ },
12
+ {
13
+ "dataset": "cola",
14
+ "type": "role",
15
+ "origin_acc": 79.5,
16
+ "origin_prompt": "While performing grammar analysis, classify the grammar of the following sentence as 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar:",
17
+ "atk_acc": 0.0,
18
+ "atk_prompt": "nhile performing grammr analysis, classify the rgammar of the fllowing sentenZe as 'acceptable' for corFect Xrammar or 'unacceptable' for incorrect gramUar:",
19
+ "transfer_ori_acc": 75.5,
20
+ "transfer_atk_acc": 72.5
21
+ },
22
+ {
23
+ "dataset": "mrpc",
24
+ "type": "task",
25
+ "origin_acc": 74.0,
26
+ "origin_prompt": "Evaluate whether the two provided sentences convey the same meaning by answering 'equivalent' or 'not_equivalent'.",
27
+ "atk_acc": 70.0,
28
+ "atk_prompt": "Evaluate whether the two provRided sentnces convey the same meaning by answering 'equivalent' or 'not_equivalent'. ",
29
+ "transfer_ori_acc": 81.86274509803921,
30
+ "transfer_atk_acc": 78.43137254901961
31
+ },
32
+ {
33
+ "dataset": "mrpc",
34
+ "type": "role",
35
+ "origin_acc": 76.0,
36
+ "origin_prompt": "In your capacity as a semantic assessment tool, evaluate the provided sentences and classify their meanings as 'equivalent' or 'not_equivalent'.",
37
+ "atk_acc": 53.0,
38
+ "atk_prompt": "nI your capacity as a sematnic asseJssment tol, evaluate the prvoided senRtences and classify their meainngs as 'equivalent' or 'not_equivalent'. ",
39
+ "transfer_ori_acc": 76.47058823529412,
40
+ "transfer_atk_acc": 78.67647058823529
41
+ },
42
+ {
43
+ "dataset": "qnli",
44
+ "type": "task",
45
+ "origin_acc": 78.0,
46
+ "origin_prompt": "Evaluate whether the given context supports the answer to the question by responding with 'entailment' or 'not_entailment'.",
47
+ "atk_acc": 50.0,
48
+ "atk_prompt": "Exaluate whetxher the iven contet Supports the anwser to the queston by respEonding with 'entailment' or 'not_entailment'. ",
49
+ "transfer_ori_acc": 90.4,
50
+ "transfer_atk_acc": 50.1
51
+ },
52
+ {
53
+ "dataset": "qnli",
54
+ "type": "role",
55
+ "origin_acc": 79.5,
56
+ "origin_prompt": "As a textual inference expert, analyze if the answer to the question can be deduced from the provided context and select 'entailment' or 'not_entailment'.",
57
+ "atk_acc": 19.5,
58
+ "atk_prompt": "As a textula inference expert, aInalyze if the answer to the question can be deducud from the provided context and Melect 'entailment' or 'not_entailment'. ",
59
+ "transfer_ori_acc": 78.10000000000001,
60
+ "transfer_atk_acc": 81.6
61
+ },
62
+ {
63
+ "dataset": "qqp",
64
+ "type": "task",
65
+ "origin_acc": 82.0,
66
+ "origin_prompt": "Assess whether the following statements are identical in meaning by answering 'equivalent' or 'not_equivalent'.",
67
+ "atk_acc": 45.0,
68
+ "atk_prompt": "Assess whethdr the followng sLtatements are identocal in meuaning by answerinJ 'equivalent' or 'not_equivalent'. ",
69
+ "transfer_ori_acc": 87.6,
70
+ "transfer_atk_acc": 85.0
71
+ },
72
+ {
73
+ "dataset": "qqp",
74
+ "type": "role",
75
+ "origin_acc": 82.5,
76
+ "origin_prompt": "As an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions.",
77
+ "atk_acc": 79.5,
78
+ "atk_prompt": "Ao an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions. ",
79
+ "transfer_ori_acc": 87.3,
80
+ "transfer_atk_acc": 86.9
81
+ },
82
+ {
83
+ "dataset": "rte",
84
+ "type": "task",
85
+ "origin_acc": 83.5,
86
+ "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.",
87
+ "atk_acc": 79.0,
88
+ "atk_prompt": "ArYe the folowing two sentenes entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word. ",
89
+ "transfer_ori_acc": 88.08664259927798,
90
+ "transfer_atk_acc": 83.39350180505414
91
+ },
92
+ {
93
+ "dataset": "rte",
94
+ "type": "role",
95
+ "origin_acc": 80.5,
96
+ "origin_prompt": "As an instrument for entailment evaluation, consider the two sentences and determine if their relationship is 'entailment' or 'not_entailment'. Respond with 'entailment' or 'not_entailment':",
97
+ "atk_acc": 78.5,
98
+ "atk_prompt": "IAs an instrument for entailment evaluatioRn, consider the otwo sentences and determine if their relationship is 'entailment' or 'not_entailment'. Respond with 'entailment' or 'not_entailment':",
99
+ "transfer_ori_acc": 85.5595667870036,
100
+ "transfer_atk_acc": 85.92057761732852
101
+ },
102
+ {
103
+ "dataset": "sst2",
104
+ "type": "task",
105
+ "origin_acc": 93.0,
106
+ "origin_prompt": "Determine the overall sentiment of this sentence, categorizing it as 'positive' or 'negative':",
107
+ "atk_acc": 88.5,
108
+ "atk_prompt": "Deetrmine the overall sentLment of this sentence, categorizing it as 'positive' or 'negative': ",
109
+ "transfer_ori_acc": 94.26605504587155,
110
+ "transfer_atk_acc": 94.72477064220183
111
+ },
112
+ {
113
+ "dataset": "sst2",
114
+ "type": "role",
115
+ "origin_acc": 93.0,
116
+ "origin_prompt": "Functioning as a sentiment identification tool, assess if the following expression is 'positive' or 'negative'. Classify:",
117
+ "atk_acc": 85.0,
118
+ "atk_prompt": "uFnctioning as a sentmient identification tool, assess if the folGowing expression is 'positive' or 'negative'. Classify: ",
119
+ "transfer_ori_acc": 94.83944954128441,
120
+ "transfer_atk_acc": 93.34862385321101
121
+ },
122
+ {
123
+ "dataset": "wnli",
124
+ "type": "task",
125
+ "origin_acc": 66.2,
126
+ "origin_prompt": "Examine the pair of sentences and determine if they exhibit entailment or not_entailment. Answer with either 'entailment' or 'not_entailment'.",
127
+ "atk_acc": 53.52,
128
+ "atk_prompt": "Examine the Bair of snetences and determine if they exhibit entailment or not_entailment. Ancwer with eMther 'entailment' or 'not_entailment'.",
129
+ "transfer_ori_acc": 61.97183098591549,
130
+ "transfer_atk_acc": 63.38028169014085
131
+ },
132
+ {
133
+ "dataset": "wnli",
134
+ "type": "role",
135
+ "origin_acc": 63.38,
136
+ "origin_prompt": "In the role of a semantic relationship analyzer, examine the connection between the given sentences and decide if they exhibit entailment or not_entailment. Answer with 'entailment' or 'not_entailment':",
137
+ "atk_acc": 53.52,
138
+ "atk_prompt": "IQn the role of a semantic relationship analyzer, eamine the connection between the given sentences and decide if they exhbit entailment or not_entailment. Answer with 'entailment' or 'not_entailment':",
139
+ "transfer_ori_acc": 54.929577464788736,
140
+ "transfer_atk_acc": 54.929577464788736
141
+ },
142
+ {
143
+ "dataset": "mmlu",
144
+ "type": "task",
145
+ "origin_acc": 62.5,
146
+ "origin_prompt": "Regarding the following multiple-choice question on {}, pick the correct answer from the options 'A', 'B', 'C', or 'D'.",
147
+ "atk_acc": 60.0,
148
+ "atk_prompt": "Regarding the following multiple-choice question on {}, pick the correct answeWr from the options 'A', 'B', 'C', or 'D'. ",
149
+ "transfer_ori_acc": 44.32624113475177,
150
+ "transfer_atk_acc": 44.680851063829785
151
+ },
152
+ {
153
+ "dataset": "mmlu",
154
+ "type": "role",
155
+ "origin_acc": 62.5,
156
+ "origin_prompt": "With your knowledge of {}, tackle the following multiple-choice question by choosing 'A', 'B', 'C', or 'D'.",
157
+ "atk_acc": 59.0,
158
+ "atk_prompt": "With your knowledge of {}, tackle the follQwing multiple-choice question by choosing 'A', 'B', 'C', or 'D'.",
159
+ "transfer_ori_acc": 43.97163120567376,
160
+ "transfer_atk_acc": 43.08510638297872
161
+ },
162
+ {
163
+ "dataset": "squad_v2",
164
+ "type": "task",
165
+ "origin_acc": 71.87,
166
+ "origin_prompt": "Discover the best answer based on the context. If the context doesn't include an answer, respond with 'unanswerable'.",
167
+ "atk_acc": 56.61,
168
+ "atk_prompt": "DiscoveMr the best answr bassd on the Rontext. Iof the cnotext doesn't include an answer, resCpond with 'unanswerable'.",
169
+ "transfer_ori_acc": 87.54992770387507,
170
+ "transfer_atk_acc": 87.08932164326902
171
+ },
172
+ {
173
+ "dataset": "squad_v2",
174
+ "type": "role",
175
+ "origin_acc": 64.5,
176
+ "origin_prompt": "As an expert with a deep understanding of the context, find the best answer. If the context doesn't include an answer, say 'unanswerable'.",
177
+ "atk_acc": 36.52,
178
+ "atk_prompt": "zs an exprt with a deep understanding of the context, find the best answr. If the context doesn't include an anCswer, asy 'unanswerable'.",
179
+ "transfer_ori_acc": 88.04992770387507,
180
+ "transfer_atk_acc": 87.08932164326902
181
+ },
182
+ {
183
+ "dataset": "iwslt",
184
+ "type": "task",
185
+ "origin_acc": 0.22,
186
+ "origin_prompt": "Translate the provided sentence from {} to {} while maintaining the original meaning and context:",
187
+ "atk_acc": 0.21,
188
+ "atk_prompt": "Translate the provideMd sentence from {} to {} while mainOaining the original meaning and context:",
189
+ "transfer_ori_acc": 0.12847010019593297,
190
+ "transfer_atk_acc": 0.13470031324495513
191
+ },
192
+ {
193
+ "dataset": "iwslt",
194
+ "type": "role",
195
+ "origin_acc": 0.22,
196
+ "origin_prompt": "In the capacity of a proficient translator, please change the following sentence from {} to {}:",
197
+ "atk_acc": 0.21,
198
+ "atk_prompt": "In the capacity of a proficient tanslator, plnase change the following senlence from {} to {}: ",
199
+ "transfer_ori_acc": 0.13904950536673938,
200
+ "transfer_atk_acc": 0.1211819424209054
201
+ },
202
+ {
203
+ "dataset": "un_multi",
204
+ "type": "task",
205
+ "origin_acc": 0.34,
206
+ "origin_prompt": "Translate the following text from the source language {} to the target language {}:",
207
+ "atk_acc": 0.33,
208
+ "atk_prompt": "Translate the following text from the source language {} to the Ptarget langugae {}:",
209
+ "transfer_ori_acc": 0.2655722295717415,
210
+ "transfer_atk_acc": 0.24955021119352877
211
+ },
212
+ {
213
+ "dataset": "un_multi",
214
+ "type": "role",
215
+ "origin_acc": 0.34,
216
+ "origin_prompt": "In the role of a language expert, perform a machine translation for the provided sentence, changing it from {} to {}:",
217
+ "atk_acc": 0.32,
218
+ "atk_prompt": "In the role of a language expert, pErform a macRhine yranslation for the provided sentence, cZanging it from {} to {}: ",
219
+ "transfer_ori_acc": 0.26923226069781947,
220
+ "transfer_atk_acc": 0.2529020268653232
221
+ },
222
+ {
223
+ "dataset": "math",
224
+ "type": "task",
225
+ "origin_acc": 6.88,
226
+ "origin_prompt": "Resolve the following mathematical question associated with {}:",
227
+ "atk_acc": 5.62,
228
+ "atk_prompt": "Resolve the following mathematical quXestion assocated with {}:",
229
+ "transfer_ori_acc": 9.375,
230
+ "transfer_atk_acc": 11.875
231
+ },
232
+ {
233
+ "dataset": "math",
234
+ "type": "role",
235
+ "origin_acc": 5.0,
236
+ "origin_prompt": "Embodying a mathematics advisor, find the answer to the following math problem with {}:",
237
+ "atk_acc": 2.5,
238
+ "atk_prompt": "Embodying a mathematics advisor, ind the anwer to the fpollowing math problem with {}: ",
239
+ "transfer_ori_acc": 11.25,
240
+ "transfer_atk_acc": 12.5
241
+ }
242
+ ]
results_transfer/chatgpt_google-flan-t5-large/deepwordbug_3_shot.json ADDED
@@ -0,0 +1,242 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "dataset": "cola",
4
+ "type": "task",
5
+ "origin_acc": 80.5,
6
+ "origin_prompt": "Examine the given sentence and decide if it is grammatically sound. Answer with either 'Acceptable' or 'Unacceptable'.",
7
+ "atk_acc": 78.5,
8
+ "atk_prompt": "Examine the given sentence and decide if it is gramatically sound. nAswer with either 'Acceptable' or 'Unacceptable'. ",
9
+ "transfer_ori_acc": 77.7,
10
+ "transfer_atk_acc": 77.8
11
+ },
12
+ {
13
+ "dataset": "cola",
14
+ "type": "role",
15
+ "origin_acc": 81.5,
16
+ "origin_prompt": "Functioning as a grammar evaluation tool, analyze the given sentence and decide if it is grammatically correct, responding with 'acceptable' or 'unacceptable'.",
17
+ "atk_acc": 78.5,
18
+ "atk_prompt": "Funtioning as a graMmar evaluation tool, analyzY the given sentence and decide if it is grammatically corLect, responding with 'acceptable' or 'unacceptable'. ",
19
+ "transfer_ori_acc": 75.2,
20
+ "transfer_atk_acc": 75.8
21
+ },
22
+ {
23
+ "dataset": "mrpc",
24
+ "type": "task",
25
+ "origin_acc": 74.5,
26
+ "origin_prompt": "Examine the pair of sentences and decide if their meanings are identical by answering with 'equivalent' or 'not_equivalent'.",
27
+ "atk_acc": 65.5,
28
+ "atk_prompt": "Examine the Kair of sentences and edcide if their meanijngs are ibentical by answering with 'equivalent' or 'not_equivalent'. ",
29
+ "transfer_ori_acc": 82.84313725490196,
30
+ "transfer_atk_acc": 79.90196078431373
31
+ },
32
+ {
33
+ "dataset": "mrpc",
34
+ "type": "role",
35
+ "origin_acc": 72.0,
36
+ "origin_prompt": "As a linguistic comparator, review the following pair of sentences and determine their semantic equivalence by choosing 'equivalent' or 'not_equivalent'.",
37
+ "atk_acc": 66.5,
38
+ "atk_prompt": "AV a linguistic comaprator, review the following pair of sentences and determine their semayntic equivalence by chhoosing 'equivalent' or 'not_equivalent'. ",
39
+ "transfer_ori_acc": 79.90196078431373,
40
+ "transfer_atk_acc": 79.16666666666666
41
+ },
42
+ {
43
+ "dataset": "qnli",
44
+ "type": "task",
45
+ "origin_acc": 85.0,
46
+ "origin_prompt": "Examine the context and question, and determine if the context logically implies the answer by selecting 'entailment' or 'not_entailment'.",
47
+ "atk_acc": 77.5,
48
+ "atk_prompt": "Examine the contedt and question, and determine if the cXontext logically implies the answer by selecting 'entailment' or 'not_entailment'. ",
49
+ "transfer_ori_acc": 86.5,
50
+ "transfer_atk_acc": 87.7
51
+ },
52
+ {
53
+ "dataset": "qnli",
54
+ "type": "role",
55
+ "origin_acc": 86.0,
56
+ "origin_prompt": "As a textual analyst, examine if the given context logically implies the answer to the question and indicate your decision with 'entailment' or 'not_entailment'.",
57
+ "atk_acc": 75.0,
58
+ "atk_prompt": "A a textual analyst, xamine if the gPven conSext logiclaly implies the asnwer to the qbuestion and indicate your decision with 'entailment' or 'not_entailment'. ",
59
+ "transfer_ori_acc": 88.8,
60
+ "transfer_atk_acc": 80.10000000000001
61
+ },
62
+ {
63
+ "dataset": "qqp",
64
+ "type": "task",
65
+ "origin_acc": 82.0,
66
+ "origin_prompt": "Can these two statements be considered equal in meaning? Answer with 'equivalent' or 'not_equivalent'.",
67
+ "atk_acc": 80.0,
68
+ "atk_prompt": "Can these two statements be considJred equal in meaning? Answer with 'equivalent' or 'not_equivalent'. ",
69
+ "transfer_ori_acc": 87.6,
70
+ "transfer_atk_acc": 87.9
71
+ },
72
+ {
73
+ "dataset": "qqp",
74
+ "type": "role",
75
+ "origin_acc": 83.0,
76
+ "origin_prompt": "Acting as a question equivalence instrument, determine if the provided questions are equivalent in meaning, answering with 'equivalent' for similar questions or 'not_equivalent' for dissimilar ones.",
77
+ "atk_acc": 81.5,
78
+ "atk_prompt": "Actinm as a question equivalence instrumetn, determine if the provided questions are equivalent in meaning, answering with 'equivalent' for similar questions or 'not_equivalent' for dissimilar ones. ",
79
+ "transfer_ori_acc": 86.5,
80
+ "transfer_atk_acc": 85.6
81
+ },
82
+ {
83
+ "dataset": "rte",
84
+ "type": "task",
85
+ "origin_acc": 85.0,
86
+ "origin_prompt": "Does the relationship between the given sentences represent entailment or not_entailment? Respond with 'entailment' or 'not_entailment'.",
87
+ "atk_acc": 80.5,
88
+ "atk_prompt": "Does the relationshBp between the given sentences repersent entailment or not_entailment? Respond with 'entailment' or 'not_entailment'. ",
89
+ "transfer_ori_acc": 83.75451263537906,
90
+ "transfer_atk_acc": 82.67148014440433
91
+ },
92
+ {
93
+ "dataset": "rte",
94
+ "type": "role",
95
+ "origin_acc": 84.5,
96
+ "origin_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment'.",
97
+ "atk_acc": 81.0,
98
+ "atk_prompt": "In your role as an entailment analysis toIol, aZssess the relationship between the given sentences and cassify it as 'entailment' or 'not_entailment'. ",
99
+ "transfer_ori_acc": 83.03249097472924,
100
+ "transfer_atk_acc": 82.31046931407943
101
+ },
102
+ {
103
+ "dataset": "sst2",
104
+ "type": "task",
105
+ "origin_acc": 97.0,
106
+ "origin_prompt": "Evaluate the sentiment of the given text and classify it as 'positive' or 'negative'.",
107
+ "atk_acc": 94.0,
108
+ "atk_prompt": "ENvaluate the senttment of the giOven text and classify it as 'positive' or 'negative'. ",
109
+ "transfer_ori_acc": 94.61009174311926,
110
+ "transfer_atk_acc": 95.29816513761467
111
+ },
112
+ {
113
+ "dataset": "sst2",
114
+ "type": "role",
115
+ "origin_acc": 97.0,
116
+ "origin_prompt": "As a sentiment classifier, determine whether the following text is 'positive' or 'negative'.",
117
+ "atk_acc": 94.0,
118
+ "atk_prompt": "qAs a setniment classifier, determnne whether the following text is 'positive' or 'negative'. ",
119
+ "transfer_ori_acc": 94.4954128440367,
120
+ "transfer_atk_acc": 95.29816513761467
121
+ },
122
+ {
123
+ "dataset": "wnli",
124
+ "type": "task",
125
+ "origin_acc": 67.61,
126
+ "origin_prompt": "Indicate if the connection between the following sentences is 'entailment' or 'not_entailment'.",
127
+ "atk_acc": 64.79,
128
+ "atk_prompt": "Indicate if the connection between the following sentences is 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true . ",
129
+ "transfer_ori_acc": 64.7887323943662,
130
+ "transfer_atk_acc": 38.028169014084504
131
+ },
132
+ {
133
+ "dataset": "wnli",
134
+ "type": "role",
135
+ "origin_acc": 64.79,
136
+ "origin_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment'.",
137
+ "atk_acc": 59.15,
138
+ "atk_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true . ",
139
+ "transfer_ori_acc": 57.74647887323944,
140
+ "transfer_atk_acc": 46.478873239436616
141
+ },
142
+ {
143
+ "dataset": "mmlu",
144
+ "type": "task",
145
+ "origin_acc": 66.5,
146
+ "origin_prompt": "In relation to the multiple-choice question on {}, please provide the accurate answer by choosing 'A', 'B', 'C', or 'D'.",
147
+ "atk_acc": 63.5,
148
+ "atk_prompt": "ISn ralation to the mulitple-choice question on {}, pleare provide the accurate answer by choosing 'A', 'B', 'C', or 'D'. ",
149
+ "transfer_ori_acc": 46.808510638297875,
150
+ "transfer_atk_acc": 45.39007092198582
151
+ },
152
+ {
153
+ "dataset": "mmlu",
154
+ "type": "role",
155
+ "origin_acc": 65.5,
156
+ "origin_prompt": "As someone well-versed in {}, please address the multiple-choice question below by selecting 'A', 'B', 'C', or 'D'.",
157
+ "atk_acc": 62.5,
158
+ "atk_prompt": "As someone well-versed in {}, please address the mulZtiple-choice question below by selecting 'A', 'B', 'C', or 'D'.",
159
+ "transfer_ori_acc": 46.63120567375886,
160
+ "transfer_atk_acc": 45.744680851063826
161
+ },
162
+ {
163
+ "dataset": "squad_v2",
164
+ "type": "task",
165
+ "origin_acc": 73.25,
166
+ "origin_prompt": "Based on the given context, provide the best possible answer. If there's no answer available in the context, reply with 'unanswerable'.",
167
+ "atk_acc": 67.18,
168
+ "atk_prompt": "Based on the goiven context, provide the best possible answer. IL there's no answer available in the coYtext, reply with 'unanswerable'.",
169
+ "transfer_ori_acc": 87.04992770387507,
170
+ "transfer_atk_acc": 87.54992770387507
171
+ },
172
+ {
173
+ "dataset": "squad_v2",
174
+ "type": "role",
175
+ "origin_acc": 72.68,
176
+ "origin_prompt": "As a subject matter expert, extract the correct answer from the context. If an answer is not present, indicate 'unanswerable'.",
177
+ "atk_acc": 66.77,
178
+ "atk_prompt": "s a subject matter eAxpert, extract the correc answer from the context. If an answer is not presnt, idicate 'unanswerable'.",
179
+ "transfer_ori_acc": 87.04992770387507,
180
+ "transfer_atk_acc": 87.14992770387508
181
+ },
182
+ {
183
+ "dataset": "iwslt",
184
+ "type": "task",
185
+ "origin_acc": 0.23,
186
+ "origin_prompt": "Given the sentence below, perform a machine translation from {} to {}.",
187
+ "atk_acc": 0.23,
188
+ "atk_prompt": "Given the sentence below, perform a mRachine traCslation from {} to {}. ",
189
+ "transfer_ori_acc": 0.21292285800980326,
190
+ "transfer_atk_acc": 0.21552262845169096
191
+ },
192
+ {
193
+ "dataset": "iwslt",
194
+ "type": "role",
195
+ "origin_acc": 0.24,
196
+ "origin_prompt": "Assuming the responsibilities of a professional translator, translate the subsequent text passage from {} to {}.",
197
+ "atk_acc": 0.23,
198
+ "atk_prompt": "Assuming the responsibilities of a professional transaltor, translate the subsequent text passage from {} to {}. ",
199
+ "transfer_ori_acc": 0.22030199520456395,
200
+ "transfer_atk_acc": 0.21973314066426236
201
+ },
202
+ {
203
+ "dataset": "un_multi",
204
+ "type": "task",
205
+ "origin_acc": 0.35,
206
+ "origin_prompt": "Please perform a translation of the given sentence, converting it from {} to {}.",
207
+ "atk_acc": 0.26,
208
+ "atk_prompt": "Plese erform a translation of the Fiven sentence, converwing it from {} to {}. ",
209
+ "transfer_ori_acc": 0.3076276888167265,
210
+ "transfer_atk_acc": 0.3054838402284908
211
+ },
212
+ {
213
+ "dataset": "un_multi",
214
+ "type": "role",
215
+ "origin_acc": 0.35,
216
+ "origin_prompt": "As a qualified translator, convert the given sentence from its original {} language to the target language {}.",
217
+ "atk_acc": 0.35,
218
+ "atk_prompt": "As a qualified translator, convert the given sentence from its original {} language to the targte language {}. ",
219
+ "transfer_ori_acc": 0.31127691224033316,
220
+ "transfer_atk_acc": 0.3078172023513576
221
+ },
222
+ {
223
+ "dataset": "math",
224
+ "type": "task",
225
+ "origin_acc": 23.12,
226
+ "origin_prompt": "Calculate the answer to the following math query about {}.",
227
+ "atk_acc": 18.75,
228
+ "atk_prompt": "Calculate the ansewr to the followiJg math query about {}. ",
229
+ "transfer_ori_acc": 16.25,
230
+ "transfer_atk_acc": 16.25
231
+ },
232
+ {
233
+ "dataset": "math",
234
+ "type": "role",
235
+ "origin_acc": 23.12,
236
+ "origin_prompt": "As a mathematics instructor, calculate the answer to the following problem related to {}.",
237
+ "atk_acc": 20.62,
238
+ "atk_prompt": "As a mathematics insrtuctor, calculate the answer to the following problem Vrelated to {}. ",
239
+ "transfer_ori_acc": 17.5,
240
+ "transfer_atk_acc": 16.25
241
+ }
242
+ ]
results_transfer/chatgpt_google-flan-t5-large/stresstest_0_shot.json ADDED
@@ -0,0 +1,242 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "dataset": "cola",
4
+ "type": "task",
5
+ "origin_acc": 78.5,
6
+ "origin_prompt": "Review the sentence below and identify whether its grammar is 'Acceptable' or 'Unacceptable':",
7
+ "atk_acc": 32.5,
8
+ "atk_prompt": "Review the sentence below and identify whether its grammar is 'Acceptable' or 'Unacceptable' and true is true :",
9
+ "transfer_ori_acc": 76.5,
10
+ "transfer_atk_acc": 68.5
11
+ },
12
+ {
13
+ "dataset": "cola",
14
+ "type": "role",
15
+ "origin_acc": 79.5,
16
+ "origin_prompt": "While performing grammar analysis, classify the grammar of the following sentence as 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar:",
17
+ "atk_acc": 75.0,
18
+ "atk_prompt": "While performing grammar analysis, classify the grammar of the following sentence as 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar and true is true and true is true and true is true and true is true and true is true :",
19
+ "transfer_ori_acc": 75.5,
20
+ "transfer_atk_acc": 66.3
21
+ },
22
+ {
23
+ "dataset": "mrpc",
24
+ "type": "task",
25
+ "origin_acc": 77.5,
26
+ "origin_prompt": "Can the given sentences be considered semantically identical? Please reply with 'equivalent' or 'not_equivalent'.",
27
+ "atk_acc": 73.0,
28
+ "atk_prompt": "Can the given sentences be considered semantically identical? Please reply with 'equivalent' or 'not_equivalent' and false is not true . ",
29
+ "transfer_ori_acc": 80.63725490196079,
30
+ "transfer_atk_acc": 76.9607843137255
31
+ },
32
+ {
33
+ "dataset": "mrpc",
34
+ "type": "role",
35
+ "origin_acc": 76.0,
36
+ "origin_prompt": "In your capacity as a semantic assessment tool, evaluate the provided sentences and classify their meanings as 'equivalent' or 'not_equivalent'.",
37
+ "atk_acc": 15.5,
38
+ "atk_prompt": "In your capacity as a semantic assessment tool, evaluate the provided sentences and classify their meanings as 'equivalent' or 'not_equivalent' and true is true . ",
39
+ "transfer_ori_acc": 76.47058823529412,
40
+ "transfer_atk_acc": 79.16666666666666
41
+ },
42
+ {
43
+ "dataset": "qnli",
44
+ "type": "task",
45
+ "origin_acc": 78.5,
46
+ "origin_prompt": "Evaluate whether the given context supports the answer to the question by responding with 'entailment' or 'not_entailment'.",
47
+ "atk_acc": 74.5,
48
+ "atk_prompt": "Evaluate whether the given context supports the answer to the question by responding with 'entailment' or 'not_entailment' and true is true . ",
49
+ "transfer_ori_acc": 90.4,
50
+ "transfer_atk_acc": 88.8
51
+ },
52
+ {
53
+ "dataset": "qnli",
54
+ "type": "role",
55
+ "origin_acc": 79.5,
56
+ "origin_prompt": "As a textual inference expert, analyze if the answer to the question can be deduced from the provided context and select 'entailment' or 'not_entailment'.",
57
+ "atk_acc": 69.0,
58
+ "atk_prompt": "As a textual inference expert, analyze if the answer to the question can be deduced from the provided context and select 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true . ",
59
+ "transfer_ori_acc": 78.10000000000001,
60
+ "transfer_atk_acc": 63.1
61
+ },
62
+ {
63
+ "dataset": "qqp",
64
+ "type": "task",
65
+ "origin_acc": 80.0,
66
+ "origin_prompt": "Can these two statements be considered equal in meaning? Answer with 'equivalent' or 'not_equivalent'.",
67
+ "atk_acc": 72.5,
68
+ "atk_prompt": "Can these two statements be considered equal in meaning? Answer with 'equivalent' or 'not_equivalent' and false is not true . ",
69
+ "transfer_ori_acc": 87.8,
70
+ "transfer_atk_acc": 83.2
71
+ },
72
+ {
73
+ "dataset": "qqp",
74
+ "type": "role",
75
+ "origin_acc": 81.0,
76
+ "origin_prompt": "Functioning as a question similarity evaluation tool, analyze the given questions and decide if they share the same meaning, responding with 'equivalent' or 'not_equivalent'.",
77
+ "atk_acc": 80.5,
78
+ "atk_prompt": "Functioning as a question similarity evaluation tool, analyze the given questions and decide if they share the same meaning, responding with 'equivalent' or 'not_equivalent' and true is true and true is true and true is true and true is true and true is true . ",
79
+ "transfer_ori_acc": 87.3,
80
+ "transfer_atk_acc": 80.10000000000001
81
+ },
82
+ {
83
+ "dataset": "rte",
84
+ "type": "task",
85
+ "origin_acc": 83.5,
86
+ "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.",
87
+ "atk_acc": 81.5,
88
+ "atk_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word and true is true . ",
89
+ "transfer_ori_acc": 88.08664259927798,
90
+ "transfer_atk_acc": 86.64259927797833
91
+ },
92
+ {
93
+ "dataset": "rte",
94
+ "type": "role",
95
+ "origin_acc": 81.5,
96
+ "origin_prompt": "As an instrument for entailment evaluation, consider the two sentences and determine if their relationship is 'entailment' or 'not_entailment'. Respond with 'entailment' or 'not_entailment':",
97
+ "atk_acc": 80.5,
98
+ "atk_prompt": "As an instrument for entailment evaluation, consider the two sentences and determine if their relationship is 'entailment' or 'not_entailment'. Respond with 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true :",
99
+ "transfer_ori_acc": 85.5595667870036,
100
+ "transfer_atk_acc": 86.28158844765343
101
+ },
102
+ {
103
+ "dataset": "sst2",
104
+ "type": "task",
105
+ "origin_acc": 91.0,
106
+ "origin_prompt": "Review this statement and decide whether it has a 'positive' or 'negative' sentiment:",
107
+ "atk_acc": 0.0,
108
+ "atk_prompt": "Review this statement and decide whether it has a 'positive' or 'negative' sentiment and true is true : ",
109
+ "transfer_ori_acc": 95.06880733944955,
110
+ "transfer_atk_acc": 94.26605504587155
111
+ },
112
+ {
113
+ "dataset": "sst2",
114
+ "type": "role",
115
+ "origin_acc": 93.5,
116
+ "origin_prompt": "Functioning as a sentiment identification tool, assess if the following expression is 'positive' or 'negative'. Classify:",
117
+ "atk_acc": 93.0,
118
+ "atk_prompt": "Functioning as a sentiment identification tool, assess if the following expression is 'positive' or 'negative'. Classify and false is not true : ",
119
+ "transfer_ori_acc": 94.83944954128441,
120
+ "transfer_atk_acc": 93.80733944954129
121
+ },
122
+ {
123
+ "dataset": "wnli",
124
+ "type": "task",
125
+ "origin_acc": 61.97,
126
+ "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.",
127
+ "atk_acc": 63.38,
128
+ "atk_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word and true is true . ",
129
+ "transfer_ori_acc": 66.19718309859155,
130
+ "transfer_atk_acc": 66.19718309859155
131
+ },
132
+ {
133
+ "dataset": "wnli",
134
+ "type": "role",
135
+ "origin_acc": 63.38,
136
+ "origin_prompt": "In the role of a semantic relationship analyzer, examine the connection between the given sentences and decide if they exhibit entailment or not_entailment. Answer with 'entailment' or 'not_entailment':",
137
+ "atk_acc": 59.15,
138
+ "atk_prompt": "In the role of a semantic relationship analyzer, examine the connection between the given sentences and decide if they exhibit entailment or not_entailment. Answer with 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true :",
139
+ "transfer_ori_acc": 54.929577464788736,
140
+ "transfer_atk_acc": 61.97183098591549
141
+ },
142
+ {
143
+ "dataset": "mmlu",
144
+ "type": "task",
145
+ "origin_acc": 63.0,
146
+ "origin_prompt": "For each multiple-choice question about {}, identify the correct answer by selecting 'A', 'B', 'C', or 'D'.",
147
+ "atk_acc": 53.5,
148
+ "atk_prompt": "For each multiple-choice question about {}, identify the correct answer by selecting 'A', 'B', 'C', or 'D' and true is true . ",
149
+ "transfer_ori_acc": 45.744680851063826,
150
+ "transfer_atk_acc": 45.0354609929078
151
+ },
152
+ {
153
+ "dataset": "mmlu",
154
+ "type": "role",
155
+ "origin_acc": 61.5,
156
+ "origin_prompt": "As a skilled person in the field of {}, please respond to the multiple-choice question by choosing 'A', 'B', 'C', or 'D'.",
157
+ "atk_acc": 57.0,
158
+ "atk_prompt": "As a skilled person in the field of {}, please respond to the multiple-choice question by choosing 'A', 'B', 'C', or 'D' and false is not true .",
159
+ "transfer_ori_acc": 44.680851063829785,
160
+ "transfer_atk_acc": 45.21276595744681
161
+ },
162
+ {
163
+ "dataset": "squad_v2",
164
+ "type": "task",
165
+ "origin_acc": 69.3,
166
+ "origin_prompt": "Based on the given context, provide the best possible answer. If there's no answer available in the context, respond with 'unanswerable'.",
167
+ "atk_acc": 69.88,
168
+ "atk_prompt": "Based on the given context, provide the best possible answer. If there's no answer available in the context, respond with 'unanswerable' and true is true .",
169
+ "transfer_ori_acc": 87.04992770387507,
170
+ "transfer_atk_acc": 87.14992770387508
171
+ },
172
+ {
173
+ "dataset": "squad_v2",
174
+ "type": "role",
175
+ "origin_acc": 64.4,
176
+ "origin_prompt": "As an expert with a deep understanding of the context, find the best answer. If the context doesn't include an answer, say 'unanswerable'.",
177
+ "atk_acc": 64.49,
178
+ "atk_prompt": "As an expert with a deep understanding of the context, find the best answer. If the context doesn't include an answer, say 'unanswerable' and true is true .",
179
+ "transfer_ori_acc": 88.04992770387507,
180
+ "transfer_atk_acc": 87.64992770387508
181
+ },
182
+ {
183
+ "dataset": "iwslt",
184
+ "type": "task",
185
+ "origin_acc": 0.22,
186
+ "origin_prompt": "Using machine translation, convert the given sentence from {} into the {} language:",
187
+ "atk_acc": 0.19,
188
+ "atk_prompt": "Using machine translation, convert the given sentence from {} into the {} language and false is not true :",
189
+ "transfer_ori_acc": 0.13366423747896072,
190
+ "transfer_atk_acc": 0.1282464400428448
191
+ },
192
+ {
193
+ "dataset": "iwslt",
194
+ "type": "role",
195
+ "origin_acc": 0.22,
196
+ "origin_prompt": "In the capacity of a proficient translator, please change the following sentence from {} to {}:",
197
+ "atk_acc": 0.21,
198
+ "atk_prompt": "In the capacity of a proficient translator, please change the following sentence from {} to and true is true and true is true and true is true and true is true and true is true {}: ",
199
+ "transfer_ori_acc": 0.13904950536673938,
200
+ "transfer_atk_acc": 0.04013983595471011
201
+ },
202
+ {
203
+ "dataset": "un_multi",
204
+ "type": "task",
205
+ "origin_acc": 0.34,
206
+ "origin_prompt": "Translate the subsequent text passage from its original {} language to the {} language:",
207
+ "atk_acc": 0.29,
208
+ "atk_prompt": "Translate the subsequent text passage from its original {} language to the {} language and false is not true :",
209
+ "transfer_ori_acc": 0.2679607664026083,
210
+ "transfer_atk_acc": 0.24062998420275192
211
+ },
212
+ {
213
+ "dataset": "un_multi",
214
+ "type": "role",
215
+ "origin_acc": 0.34,
216
+ "origin_prompt": "In the role of a language expert, perform a machine translation for the provided sentence, changing it from {} to {}:",
217
+ "atk_acc": 0.31,
218
+ "atk_prompt": "In the role of a language expert, perform a machine translation for the provided sentence, changing it from {} to and false is not true {}: ",
219
+ "transfer_ori_acc": 0.26923226069781947,
220
+ "transfer_atk_acc": 0.20805924250934474
221
+ },
222
+ {
223
+ "dataset": "math",
224
+ "type": "task",
225
+ "origin_acc": 6.25,
226
+ "origin_prompt": "Resolve the following mathematical question associated with {}:",
227
+ "atk_acc": 5.62,
228
+ "atk_prompt": "Resolve the following mathematical question associated with and true is true {}:",
229
+ "transfer_ori_acc": 9.375,
230
+ "transfer_atk_acc": 13.125
231
+ },
232
+ {
233
+ "dataset": "math",
234
+ "type": "role",
235
+ "origin_acc": 5.0,
236
+ "origin_prompt": "Embodying a mathematics advisor, find the answer to the following math problem with {}:",
237
+ "atk_acc": 3.12,
238
+ "atk_prompt": "Embodying a mathematics advisor, find the answer to the following math problem with and true is true and true is true and true is true and true is true and true is true {}: ",
239
+ "transfer_ori_acc": 11.25,
240
+ "transfer_atk_acc": 13.750000000000002
241
+ }
242
+ ]
results_transfer/chatgpt_google-flan-t5-large/stresstest_3_shot.json ADDED
@@ -0,0 +1,242 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "dataset": "cola",
4
+ "type": "task",
5
+ "origin_acc": 80.5,
6
+ "origin_prompt": "Examine the given sentence and decide if it is grammatically sound. Answer with either 'Acceptable' or 'Unacceptable'.",
7
+ "atk_acc": 79.5,
8
+ "atk_prompt": "Examine the given sentence and decide if it is grammatically sound. Answer with either 'Acceptable' or 'Unacceptable' and true is true and true is true and true is true and true is true and true is true . ",
9
+ "transfer_ori_acc": 77.7,
10
+ "transfer_atk_acc": 77.60000000000001
11
+ },
12
+ {
13
+ "dataset": "cola",
14
+ "type": "role",
15
+ "origin_acc": 81.5,
16
+ "origin_prompt": "Functioning as a grammar evaluation tool, analyze the given sentence and decide if it is grammatically correct, responding with 'acceptable' or 'unacceptable'.",
17
+ "atk_acc": 80.5,
18
+ "atk_prompt": "Functioning as a grammar evaluation tool, analyze the given sentence and decide if it is grammatically correct, responding with 'acceptable' or 'unacceptable' and false is not true . ",
19
+ "transfer_ori_acc": 75.2,
20
+ "transfer_atk_acc": 75.6
21
+ },
22
+ {
23
+ "dataset": "mrpc",
24
+ "type": "task",
25
+ "origin_acc": 73.5,
26
+ "origin_prompt": "Assess if the two given sentences have equivalent meanings by selecting 'equivalent' or 'not_equivalent'.",
27
+ "atk_acc": 71.5,
28
+ "atk_prompt": "Assess if the two given sentences have equivalent meanings by selecting 'equivalent' or 'not_equivalent' and true is true and true is true and true is true and true is true and true is true . ",
29
+ "transfer_ori_acc": 82.84313725490196,
30
+ "transfer_atk_acc": 25.735294117647058
31
+ },
32
+ {
33
+ "dataset": "mrpc",
34
+ "type": "role",
35
+ "origin_acc": 72.5,
36
+ "origin_prompt": "In the role of a textual comparison specialist, examine the given sentences and decide if they share the same meaning, responding with 'equivalent' or 'not_equivalent'.",
37
+ "atk_acc": 70.0,
38
+ "atk_prompt": "In the role of a textual comparison specialist, examine the given sentences and decide if they share the same meaning, responding with 'equivalent' or 'not_equivalent' and true is true . ",
39
+ "transfer_ori_acc": 79.65686274509804,
40
+ "transfer_atk_acc": 82.35294117647058
41
+ },
42
+ {
43
+ "dataset": "qnli",
44
+ "type": "task",
45
+ "origin_acc": 85.0,
46
+ "origin_prompt": "Examine the context and question, and determine if the context logically implies the answer by selecting 'entailment' or 'not_entailment'.",
47
+ "atk_acc": 79.5,
48
+ "atk_prompt": "Examine the context and question, and determine if the context logically implies the answer by selecting 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true . ",
49
+ "transfer_ori_acc": 86.5,
50
+ "transfer_atk_acc": 81.5
51
+ },
52
+ {
53
+ "dataset": "qnli",
54
+ "type": "role",
55
+ "origin_acc": 84.0,
56
+ "origin_prompt": "As a semantic researcher, evaluate whether the provided context supports the answer to the question and choose 'entailment' or 'not_entailment'.",
57
+ "atk_acc": 81.5,
58
+ "atk_prompt": "As a semantic researcher, evaluate whether the provided context supports the answer to the question and choose 'entailment' or 'not_entailment' and false is not true . ",
59
+ "transfer_ori_acc": 87.3,
60
+ "transfer_atk_acc": 85.8
61
+ },
62
+ {
63
+ "dataset": "qqp",
64
+ "type": "task",
65
+ "origin_acc": 81.5,
66
+ "origin_prompt": "Can these two statements be considered equal in meaning? Answer with 'equivalent' or 'not_equivalent'.",
67
+ "atk_acc": 81.0,
68
+ "atk_prompt": "Can these two statements be considered equal in meaning? Answer with 'equivalent' or 'not_equivalent' and false is not true . ",
69
+ "transfer_ori_acc": 87.6,
70
+ "transfer_atk_acc": 85.7
71
+ },
72
+ {
73
+ "dataset": "qqp",
74
+ "type": "role",
75
+ "origin_acc": 83.0,
76
+ "origin_prompt": "As an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions.",
77
+ "atk_acc": 82.0,
78
+ "atk_prompt": "As an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions and true is true and true is true and true is true and true is true and true is true . ",
79
+ "transfer_ori_acc": 86.4,
80
+ "transfer_atk_acc": 86.8
81
+ },
82
+ {
83
+ "dataset": "rte",
84
+ "type": "task",
85
+ "origin_acc": 85.0,
86
+ "origin_prompt": "Does the relationship between the given sentences represent entailment or not_entailment? Respond with 'entailment' or 'not_entailment'.",
87
+ "atk_acc": 82.0,
88
+ "atk_prompt": "Does the relationship between the given sentences represent entailment or not_entailment? Respond with 'entailment' or 'not_entailment' and false is not true . ",
89
+ "transfer_ori_acc": 83.75451263537906,
90
+ "transfer_atk_acc": 85.1985559566787
91
+ },
92
+ {
93
+ "dataset": "rte",
94
+ "type": "role",
95
+ "origin_acc": 83.0,
96
+ "origin_prompt": "As an entailment identification system, examine the connection between the following sentences and respond with 'entailment' or 'not_entailment'.",
97
+ "atk_acc": 83.0,
98
+ "atk_prompt": "As an entailment identification system, examine the connection between the following sentences and respond with 'entailment' or 'not_entailment' and false is not true . ",
99
+ "transfer_ori_acc": 82.67148014440433,
100
+ "transfer_atk_acc": 80.86642599277978
101
+ },
102
+ {
103
+ "dataset": "sst2",
104
+ "type": "task",
105
+ "origin_acc": 97.0,
106
+ "origin_prompt": "Evaluate the sentiment of the given text and classify it as 'positive' or 'negative'.",
107
+ "atk_acc": 95.5,
108
+ "atk_prompt": "Evaluate the sentiment of the given text and classify it as 'positive' or 'negative' and true is true . ",
109
+ "transfer_ori_acc": 94.61009174311926,
110
+ "transfer_atk_acc": 94.95412844036697
111
+ },
112
+ {
113
+ "dataset": "sst2",
114
+ "type": "role",
115
+ "origin_acc": 97.0,
116
+ "origin_prompt": "As a sentiment classifier, determine whether the following text is 'positive' or 'negative'.",
117
+ "atk_acc": 96.0,
118
+ "atk_prompt": "As a sentiment classifier, determine whether the following text is 'positive' or 'negative' and false is not true . ",
119
+ "transfer_ori_acc": 94.4954128440367,
120
+ "transfer_atk_acc": 94.95412844036697
121
+ },
122
+ {
123
+ "dataset": "wnli",
124
+ "type": "task",
125
+ "origin_acc": 67.61,
126
+ "origin_prompt": "Indicate if the connection between the following sentences is 'entailment' or 'not_entailment'.",
127
+ "atk_acc": 60.56,
128
+ "atk_prompt": "Indicate if the connection between the later sentences is 'entailment' or 'not_entailment'. ",
129
+ "transfer_ori_acc": 64.7887323943662,
130
+ "transfer_atk_acc": 64.7887323943662
131
+ },
132
+ {
133
+ "dataset": "wnli",
134
+ "type": "role",
135
+ "origin_acc": 66.2,
136
+ "origin_prompt": "In the role of a semantic relationship analyzer, examine the connection between the given sentences and decide if they exhibit entailment or not_entailment. Answer with 'entailment' or 'not_entailment'.",
137
+ "atk_acc": 59.15,
138
+ "atk_prompt": "In the role of a semantic relationship analyzer, investigated the linking between the given sentences and decide if they exhibit entailment or not_entailment. Answer with 'entailment' or 'not_entailment'. ",
139
+ "transfer_ori_acc": 56.33802816901409,
140
+ "transfer_atk_acc": 56.33802816901409
141
+ },
142
+ {
143
+ "dataset": "mmlu",
144
+ "type": "task",
145
+ "origin_acc": 66.5,
146
+ "origin_prompt": "Please respond to the multiple-choice question about {} by selecting the appropriate answer: 'A', 'B', 'C', or 'D'.",
147
+ "atk_acc": 61.5,
148
+ "atk_prompt": "Please respond to the multiple-choice question about {} by selecting the appropriate answer: 'A', 'B', 'C', or 'D' and false is not true . ",
149
+ "transfer_ori_acc": 45.21276595744681,
150
+ "transfer_atk_acc": 44.858156028368796
151
+ },
152
+ {
153
+ "dataset": "mmlu",
154
+ "type": "role",
155
+ "origin_acc": 65.5,
156
+ "origin_prompt": "As someone well-versed in {}, please address the multiple-choice question below by selecting 'A', 'B', 'C', or 'D'.",
157
+ "atk_acc": 59.5,
158
+ "atk_prompt": "As someone well-versed in {}, please address the multiple-choice question below by selecting 'A', 'B', 'C', or 'D' and false is not true .",
159
+ "transfer_ori_acc": 46.63120567375886,
160
+ "transfer_atk_acc": 44.858156028368796
161
+ },
162
+ {
163
+ "dataset": "squad_v2",
164
+ "type": "task",
165
+ "origin_acc": 73.99,
166
+ "origin_prompt": "Please derive the most fitting answer from the context. If there isn't an answer in the context, use 'unanswerable'.",
167
+ "atk_acc": 61.58,
168
+ "atk_prompt": "Please derive the most fitting answer from the context. If there isn't an answer in the context, use 'unanswerable' and false is not true .",
169
+ "transfer_ori_acc": 87.04992770387507,
170
+ "transfer_atk_acc": 87.14992770387508
171
+ },
172
+ {
173
+ "dataset": "squad_v2",
174
+ "type": "role",
175
+ "origin_acc": 72.94,
176
+ "origin_prompt": "Being well-versed in the context, please derive the most fitting answer. If there isn't an answer in the context, use 'unanswerable'.",
177
+ "atk_acc": 62.85,
178
+ "atk_prompt": "Being well-versed in the context, please derive the most fitting answer. If there isn't an answer in the context, use 'unanswerable' and false is not true .",
179
+ "transfer_ori_acc": 87.04992770387507,
180
+ "transfer_atk_acc": 87.14992770387508
181
+ },
182
+ {
183
+ "dataset": "iwslt",
184
+ "type": "task",
185
+ "origin_acc": 0.24,
186
+ "origin_prompt": "Please perform a translation of the given sentence, converting it from {} to {}.",
187
+ "atk_acc": 0.23,
188
+ "atk_prompt": "Please perform a translation of the given sentence, converting it from {} to and true is true and true is true and true is true and true is true and true is true {}. ",
189
+ "transfer_ori_acc": 0.22004054628058534,
190
+ "transfer_atk_acc": 0.21794699013962093
191
+ },
192
+ {
193
+ "dataset": "iwslt",
194
+ "type": "role",
195
+ "origin_acc": 0.24,
196
+ "origin_prompt": "In the role of a language expert, perform a machine translation for the provided sentence, changing it from {} to {}.",
197
+ "atk_acc": 0.23,
198
+ "atk_prompt": "In the role of a language expert, perform a machine translation for the provided sentence, changing it from {} to and false is not true {}. ",
199
+ "transfer_ori_acc": 0.22070346732323043,
200
+ "transfer_atk_acc": 0.21483292774582805
201
+ },
202
+ {
203
+ "dataset": "un_multi",
204
+ "type": "task",
205
+ "origin_acc": 0.35,
206
+ "origin_prompt": "Perform a machine translation for the provided sentence, changing it from {} to {}.",
207
+ "atk_acc": 0.34,
208
+ "atk_prompt": "Perform a machine translation for the provided sentence, changing it from {} to and false is not true {}. ",
209
+ "transfer_ori_acc": 0.30870734220870066,
210
+ "transfer_atk_acc": 0.3143076066591428
211
+ },
212
+ {
213
+ "dataset": "un_multi",
214
+ "type": "role",
215
+ "origin_acc": 0.35,
216
+ "origin_prompt": "Acting as a professional translator, translate the provided sentence from {} to {}.",
217
+ "atk_acc": 0.35,
218
+ "atk_prompt": "Acting as a professional translator, translate the provided sentence from {} to and true is true {}. ",
219
+ "transfer_ori_acc": 0.30726569506388013,
220
+ "transfer_atk_acc": 0.30886093176248847
221
+ },
222
+ {
223
+ "dataset": "math",
224
+ "type": "task",
225
+ "origin_acc": 23.12,
226
+ "origin_prompt": "Resolve the following mathematical question associated with {}.",
227
+ "atk_acc": 20.0,
228
+ "atk_prompt": "Resolve the following mathematical question associated with and true is true {}. ",
229
+ "transfer_ori_acc": 17.5,
230
+ "transfer_atk_acc": 16.875
231
+ },
232
+ {
233
+ "dataset": "math",
234
+ "type": "role",
235
+ "origin_acc": 22.5,
236
+ "origin_prompt": "As a mathematics instructor, calculate the answer to the following problem related to {}.",
237
+ "atk_acc": 20.62,
238
+ "atk_prompt": "As a mathematics instructor, calculate the answer to the following problem related to and false is not true {}. ",
239
+ "transfer_ori_acc": 17.5,
240
+ "transfer_atk_acc": 16.25
241
+ }
242
+ ]
results_transfer/chatgpt_google-flan-t5-large/textbugger_0_shot.json ADDED
@@ -0,0 +1,242 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "dataset": "cola",
4
+ "type": "task",
5
+ "origin_acc": 77.5,
6
+ "origin_prompt": "Assess the grammatical structure of the given sentence and classify it as 'Acceptable' or 'Unacceptable':",
7
+ "atk_acc": 6.5,
8
+ "atk_prompt": "As sess the garmmatical structure of the conferred sentence and classify it as 'Acceptable' or 'Unacceptable':",
9
+ "transfer_ori_acc": 75.5,
10
+ "transfer_atk_acc": 74.5
11
+ },
12
+ {
13
+ "dataset": "cola",
14
+ "type": "role",
15
+ "origin_acc": 79.5,
16
+ "origin_prompt": "While performing grammar analysis, classify the grammar of the following sentence as 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar:",
17
+ "atk_acc": 9.5,
18
+ "atk_prompt": "While prforming grammar analyzing, classify the grammar of the follows sentеnce as 'acceptable' for correcting gammar or 'unacceptable' for incorrect ɡrammar:",
19
+ "transfer_ori_acc": 75.5,
20
+ "transfer_atk_acc": 72.5
21
+ },
22
+ {
23
+ "dataset": "mrpc",
24
+ "type": "task",
25
+ "origin_acc": 75.0,
26
+ "origin_prompt": "Evaluate whether the two provided sentences convey the same meaning by answering 'equivalent' or 'not_equivalent'.",
27
+ "atk_acc": 67.5,
28
+ "atk_prompt": "Evaluate wհether the both provided sentences convey the same meaninɡ by answeriոg 'equivalent' or 'not_equivalent'. ",
29
+ "transfer_ori_acc": 81.86274509803921,
30
+ "transfer_atk_acc": 83.33333333333334
31
+ },
32
+ {
33
+ "dataset": "mrpc",
34
+ "type": "role",
35
+ "origin_acc": 75.0,
36
+ "origin_prompt": "In your capacity as a semantic assessment tool, evaluate the provided sentences and classify their meanings as 'equivalent' or 'not_equivalent'.",
37
+ "atk_acc": 9.0,
38
+ "atk_prompt": "In your capacities as a semɑntic assessment tool, evaluate the provdied penalties and classify their meaning as 'equivalent' or 'not_equivalent'. ",
39
+ "transfer_ori_acc": 76.47058823529412,
40
+ "transfer_atk_acc": 78.67647058823529
41
+ },
42
+ {
43
+ "dataset": "qnli",
44
+ "type": "task",
45
+ "origin_acc": 77.0,
46
+ "origin_prompt": "Consider the context and question, and indicate if the answer can be logically deduced from the context by responding with 'entailment' or 'not_entailment'.",
47
+ "atk_acc": 57.0,
48
+ "atk_prompt": "Considеr the context and question, and idicate if the respond can be logically ԁeduced from the context by ⲅesponding with 'entailment' or 'not_entailment'. ",
49
+ "transfer_ori_acc": 83.39999999999999,
50
+ "transfer_atk_acc": 79.7
51
+ },
52
+ {
53
+ "dataset": "qnli",
54
+ "type": "role",
55
+ "origin_acc": 78.5,
56
+ "origin_prompt": "As a textual inference expert, analyze if the answer to the question can be deduced from the provided context and select 'entailment' or 'not_entailment'.",
57
+ "atk_acc": 20.0,
58
+ "atk_prompt": "Because a textual inference expert, analysing if the answeⲅ to the question can be deduced from the providеd contex𝚝 and choosing 'entailment' or 'not_entailment'. ",
59
+ "transfer_ori_acc": 78.10000000000001,
60
+ "transfer_atk_acc": 71.5
61
+ },
62
+ {
63
+ "dataset": "qqp",
64
+ "type": "task",
65
+ "origin_acc": 81.5,
66
+ "origin_prompt": "Assess whether the following statements are identical in meaning by answering 'equivalent' or 'not_equivalent'.",
67
+ "atk_acc": 44.5,
68
+ "atk_prompt": "Assess whether the following statements are analogous in maning by answeⲅing 'equivalent' or 'not_equivalent'. ",
69
+ "transfer_ori_acc": 87.6,
70
+ "transfer_atk_acc": 87.5
71
+ },
72
+ {
73
+ "dataset": "qqp",
74
+ "type": "role",
75
+ "origin_acc": 83.0,
76
+ "origin_prompt": "As an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions.",
77
+ "atk_acc": 77.5,
78
+ "atk_prompt": "As an instrument for problem comparison evɑluation, consider the ques𝚝ions and determine if their meaning is the same, respond with 'equivalent' for similar questiоns or 'not_equivalent' for different questions. ",
79
+ "transfer_ori_acc": 87.3,
80
+ "transfer_atk_acc": 87.1
81
+ },
82
+ {
83
+ "dataset": "rte",
84
+ "type": "task",
85
+ "origin_acc": 83.0,
86
+ "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.",
87
+ "atk_acc": 78.5,
88
+ "atk_prompt": "Be the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word. ",
89
+ "transfer_ori_acc": 88.08664259927798,
90
+ "transfer_atk_acc": 83.39350180505414
91
+ },
92
+ {
93
+ "dataset": "rte",
94
+ "type": "role",
95
+ "origin_acc": 81.0,
96
+ "origin_prompt": "Working as an entailment classifier, identify whether the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment':",
97
+ "atk_acc": 74.5,
98
+ "atk_prompt": "Working as an entailment classifier, detecting if the given torque of sentences displays entailment or not_entailment. Answering with 'entailment' or 'not_entailment':",
99
+ "transfer_ori_acc": 83.03249097472924,
100
+ "transfer_atk_acc": 83.75451263537906
101
+ },
102
+ {
103
+ "dataset": "sst2",
104
+ "type": "task",
105
+ "origin_acc": 90.0,
106
+ "origin_prompt": "Review this statement and decide whether it has a 'positive' or 'negative' sentiment:",
107
+ "atk_acc": 82.5,
108
+ "atk_prompt": "Review this speeches and decide if it has a 'positive' or 'negative' sentiment: ",
109
+ "transfer_ori_acc": 95.06880733944955,
110
+ "transfer_atk_acc": 94.61009174311926
111
+ },
112
+ {
113
+ "dataset": "sst2",
114
+ "type": "role",
115
+ "origin_acc": 93.5,
116
+ "origin_prompt": "Acting as a sentiment evaluator, identify if the given sentence is 'positive' or 'negative'. Classify:",
117
+ "atk_acc": 86.0,
118
+ "atk_prompt": "Atcing as a sentiemnt evalua𝚝or, detecting if the given sentence is 'positive' or 'negative'. Classify: ",
119
+ "transfer_ori_acc": 95.06880733944955,
120
+ "transfer_atk_acc": 94.72477064220183
121
+ },
122
+ {
123
+ "dataset": "wnli",
124
+ "type": "task",
125
+ "origin_acc": 67.61,
126
+ "origin_prompt": "Examine the pair of sentences and determine if they exhibit entailment or not_entailment. Answer with either 'entailment' or 'not_entailment'.",
127
+ "atk_acc": 52.11,
128
+ "atk_prompt": "Examine the paiⲅ of sentences and determines if they exhibit entailment or not_entailment. Answer with neither 'entailment' or 'not_entailment'.",
129
+ "transfer_ori_acc": 61.97183098591549,
130
+ "transfer_atk_acc": 64.7887323943662
131
+ },
132
+ {
133
+ "dataset": "wnli",
134
+ "type": "role",
135
+ "origin_acc": 60.56,
136
+ "origin_prompt": "Working as an entailment classifier, identify whether the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment':",
137
+ "atk_acc": 52.11,
138
+ "atk_prompt": "Working as an entailment classifier, detects whether the given pair of sentences displays entailment or not_entailment. Answering with 'entailment' or 'not_entailment':",
139
+ "transfer_ori_acc": 57.74647887323944,
140
+ "transfer_atk_acc": 57.74647887323944
141
+ },
142
+ {
143
+ "dataset": "mmlu",
144
+ "type": "task",
145
+ "origin_acc": 62.5,
146
+ "origin_prompt": "Regarding the following multiple-choice question on {}, pick the correct answer from the options 'A', 'B', 'C', or 'D'.",
147
+ "atk_acc": 51.0,
148
+ "atk_prompt": "Regarding the hereafter multiple-choice question on {}, piϲk the corrects answers from the options 'A', 'B', 'C', or 'D'. ",
149
+ "transfer_ori_acc": 44.32624113475177,
150
+ "transfer_atk_acc": 44.680851063829785
151
+ },
152
+ {
153
+ "dataset": "mmlu",
154
+ "type": "role",
155
+ "origin_acc": 62.5,
156
+ "origin_prompt": "With your knowledge of {}, tackle the following multiple-choice question by choosing 'A', 'B', 'C', or 'D'.",
157
+ "atk_acc": 57.5,
158
+ "atk_prompt": "Witհ your knowledge of {}, confront the following multipⅼe-choice question by select 'A', 'B', 'C', or 'D'.",
159
+ "transfer_ori_acc": 43.97163120567376,
160
+ "transfer_atk_acc": 44.858156028368796
161
+ },
162
+ {
163
+ "dataset": "squad_v2",
164
+ "type": "task",
165
+ "origin_acc": 69.27,
166
+ "origin_prompt": "Based on the given context, provide the best possible answer. If there's no answer available in the context, respond with 'unanswerable'.",
167
+ "atk_acc": 49.17,
168
+ "atk_prompt": "Based on the given ambient, provide the nicest achievable answer. If there's no answer avaiⅼable in the context, respond with 'unanswerable'.",
169
+ "transfer_ori_acc": 87.04992770387507,
170
+ "transfer_atk_acc": 87.64992770387508
171
+ },
172
+ {
173
+ "dataset": "squad_v2",
174
+ "type": "role",
175
+ "origin_acc": 64.85,
176
+ "origin_prompt": "As an expert with a deep understanding of the context, find the best answer. If the context doesn't include an answer, say 'unanswerable'.",
177
+ "atk_acc": 14.05,
178
+ "atk_prompt": "As an expert with a deepest understanding of the context, found the nicest respond. Though the context doesn't incⅼude an answers, telling 'unanswerable'.",
179
+ "transfer_ori_acc": 88.04992770387507,
180
+ "transfer_atk_acc": 87.58932164326902
181
+ },
182
+ {
183
+ "dataset": "iwslt",
184
+ "type": "task",
185
+ "origin_acc": 0.22,
186
+ "origin_prompt": "Translate the provided sentence from {} to {} while maintaining the original meaning and context:",
187
+ "atk_acc": 0.19,
188
+ "atk_prompt": "Translate the provied punishments from {} to {} while maintaining the original meaning and context:",
189
+ "transfer_ori_acc": 0.12847010019593297,
190
+ "transfer_atk_acc": 0.13516467912232863
191
+ },
192
+ {
193
+ "dataset": "iwslt",
194
+ "type": "role",
195
+ "origin_acc": 0.22,
196
+ "origin_prompt": "Taking on the role of a certified translator, perform a translation of the given sentence from {} to {}:",
197
+ "atk_acc": 0.03,
198
+ "atk_prompt": "Pick on the role of a certified translation, fulfilling a translation of the yielded sentence from {} to {}: ",
199
+ "transfer_ori_acc": 0.14266566803921382,
200
+ "transfer_atk_acc": 0.13671812855476675
201
+ },
202
+ {
203
+ "dataset": "un_multi",
204
+ "type": "task",
205
+ "origin_acc": 0.34,
206
+ "origin_prompt": "Perform a machine translation for the provided sentence, changing it from {} to {}:",
207
+ "atk_acc": 0.32,
208
+ "atk_prompt": "Perform a typewriters translation for the provided sentence, altered it from {} to {}:",
209
+ "transfer_ori_acc": 0.26814405340158265,
210
+ "transfer_atk_acc": 0.27318496651757523
211
+ },
212
+ {
213
+ "dataset": "un_multi",
214
+ "type": "role",
215
+ "origin_acc": 0.34,
216
+ "origin_prompt": "While acting as an experienced language expert, translate the following text from {} to {}:",
217
+ "atk_acc": 0.32,
218
+ "atk_prompt": "Whilst acting as an experienced language expert, translate the following laws from {} to {}: ",
219
+ "transfer_ori_acc": 0.26489202618745367,
220
+ "transfer_atk_acc": 0.26471912913694645
221
+ },
222
+ {
223
+ "dataset": "math",
224
+ "type": "task",
225
+ "origin_acc": 6.25,
226
+ "origin_prompt": "Resolve the following mathematical question associated with {}:",
227
+ "atk_acc": 3.75,
228
+ "atk_prompt": "Solving the following mathematical question associɑted with {}:",
229
+ "transfer_ori_acc": 9.375,
230
+ "transfer_atk_acc": 14.374999999999998
231
+ },
232
+ {
233
+ "dataset": "math",
234
+ "type": "role",
235
+ "origin_acc": 5.0,
236
+ "origin_prompt": "In the position of a mathematics coach, figure out the answer to this math challenge involving {}:",
237
+ "atk_acc": 1.25,
238
+ "atk_prompt": "In the poste of a mathematical coach, figure out the replying to this math challenge involving {}: ",
239
+ "transfer_ori_acc": 13.750000000000002,
240
+ "transfer_atk_acc": 13.125
241
+ }
242
+ ]
results_transfer/chatgpt_google-flan-t5-large/textbugger_3_shot.json ADDED
@@ -0,0 +1,242 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "dataset": "cola",
4
+ "type": "task",
5
+ "origin_acc": 80.5,
6
+ "origin_prompt": "Analyze the provided sentence and classify its grammatical correctness as 'Acceptable' or 'Unacceptable'.",
7
+ "atk_acc": 67.5,
8
+ "atk_prompt": "Analysing the provided sentences and classify its pronouns correct as 'Acceptable' or 'Unacceptable'. ",
9
+ "transfer_ori_acc": 77.10000000000001,
10
+ "transfer_atk_acc": 66.9
11
+ },
12
+ {
13
+ "dataset": "cola",
14
+ "type": "role",
15
+ "origin_acc": 81.0,
16
+ "origin_prompt": "In the role of a syntax analyzer, examine the grammar of the provided sentence and decide if it is correct, answering with 'acceptable' for grammatically correct sentences or 'unacceptable' for incorrect ones.",
17
+ "atk_acc": 77.5,
18
+ "atk_prompt": "In the role of a syntax checker, examine the grammɑr of the provided sentence and decide if it is correct, answering with 'acceptable' for grammatically correct sentence or 'unacceptable' for incorrect ones. ",
19
+ "transfer_ori_acc": 74.2,
20
+ "transfer_atk_acc": 74.1
21
+ },
22
+ {
23
+ "dataset": "mrpc",
24
+ "type": "task",
25
+ "origin_acc": 73.0,
26
+ "origin_prompt": "Assess if the two given sentences have equivalent meanings by selecting 'equivalent' or 'not_equivalent'.",
27
+ "atk_acc": 68.0,
28
+ "atk_prompt": "Ass ess if the two gien sentences have equivalent meanings by selecting 'equivalent' or 'not_equivalent'. ",
29
+ "transfer_ori_acc": 82.84313725490196,
30
+ "transfer_atk_acc": 81.61764705882352
31
+ },
32
+ {
33
+ "dataset": "mrpc",
34
+ "type": "role",
35
+ "origin_acc": 71.5,
36
+ "origin_prompt": "As a linguistic comparator, review the following pair of sentences and determine their semantic equivalence by choosing 'equivalent' or 'not_equivalent'.",
37
+ "atk_acc": 64.5,
38
+ "atk_prompt": "As a linguistic comparator, review the following torque of sentences and determine their semantiϲ eԛuivalence by choosing 'equivalent' or 'not_equivalent'. ",
39
+ "transfer_ori_acc": 79.90196078431373,
40
+ "transfer_atk_acc": 77.69607843137256
41
+ },
42
+ {
43
+ "dataset": "qnli",
44
+ "type": "task",
45
+ "origin_acc": 85.0,
46
+ "origin_prompt": "Examine the context and question, and determine if the context logically implies the answer by selecting 'entailment' or 'not_entailment'.",
47
+ "atk_acc": 76.0,
48
+ "atk_prompt": "Examine the cntext and question, and determine if the backdrop logically implies the answer by selecting 'entailment' or 'not_entailment'. ",
49
+ "transfer_ori_acc": 86.5,
50
+ "transfer_atk_acc": 88.4
51
+ },
52
+ {
53
+ "dataset": "qnli",
54
+ "type": "role",
55
+ "origin_acc": 83.0,
56
+ "origin_prompt": "As a semantic researcher, evaluate whether the provided context supports the answer to the question and choose 'entailment' or 'not_entailment'.",
57
+ "atk_acc": 73.5,
58
+ "atk_prompt": "Aѕ a semantic researcher, evaluate si the gave framework supports the respond to the question and choose 'entailment' or 'not_entailment'. ",
59
+ "transfer_ori_acc": 87.3,
60
+ "transfer_atk_acc": 80.7
61
+ },
62
+ {
63
+ "dataset": "qqp",
64
+ "type": "task",
65
+ "origin_acc": 82.5,
66
+ "origin_prompt": "Can these two statements be considered equal in meaning? Answer with 'equivalent' or 'not_equivalent'.",
67
+ "atk_acc": 80.0,
68
+ "atk_prompt": "Can these two st atements be considered equal in meaning? Ansԝer with 'equivalent' or 'not_equivalent'. ",
69
+ "transfer_ori_acc": 87.6,
70
+ "transfer_atk_acc": 85.9
71
+ },
72
+ {
73
+ "dataset": "qqp",
74
+ "type": "role",
75
+ "origin_acc": 82.5,
76
+ "origin_prompt": "In the capacity of a question assessment system, indicate if the meaning of the provided questions is the same, responding with 'equivalent' or 'not_equivalent'.",
77
+ "atk_acc": 80.5,
78
+ "atk_prompt": "In the abilities of a question ɑssessment system, indicate if the meaning of the provided matters is the same, responding with 'equivalent' or 'not_equivalent'. ",
79
+ "transfer_ori_acc": 86.1,
80
+ "transfer_atk_acc": 85.9
81
+ },
82
+ {
83
+ "dataset": "rte",
84
+ "type": "task",
85
+ "origin_acc": 84.0,
86
+ "origin_prompt": "Review the two statements and categorize their relationship as either 'entailment' or 'not_entailment'.",
87
+ "atk_acc": 76.5,
88
+ "atk_prompt": "Review the two declarations and categorize their relationship as neither 'entailment' or 'not_entailment'. ",
89
+ "transfer_ori_acc": 84.83754512635379,
90
+ "transfer_atk_acc": 84.47653429602889
91
+ },
92
+ {
93
+ "dataset": "rte",
94
+ "type": "role",
95
+ "origin_acc": 83.5,
96
+ "origin_prompt": "As an entailment identification system, examine the connection between the following sentences and respond with 'entailment' or 'not_entailment'.",
97
+ "atk_acc": 80.0,
98
+ "atk_prompt": "As an entailment identification system, eamine the connection between the following sentences and reѕpond with 'entailment' or 'not_entailment'. ",
99
+ "transfer_ori_acc": 82.67148014440433,
100
+ "transfer_atk_acc": 83.39350180505414
101
+ },
102
+ {
103
+ "dataset": "sst2",
104
+ "type": "task",
105
+ "origin_acc": 95.5,
106
+ "origin_prompt": "After examining the following expression, label its emotion as either 'positive' or 'negative'.",
107
+ "atk_acc": 74.0,
108
+ "atk_prompt": "Ater examining the following expression, etiquette its emotion as nor 'positive' or 'negative'. ",
109
+ "transfer_ori_acc": 93.69266055045871,
110
+ "transfer_atk_acc": 94.61009174311926
111
+ },
112
+ {
113
+ "dataset": "sst2",
114
+ "type": "role",
115
+ "origin_acc": 97.5,
116
+ "origin_prompt": "In the capacity of a sentiment classifier, decide whether the given quote is 'positive' or 'negative'.",
117
+ "atk_acc": 93.5,
118
+ "atk_prompt": "Onto the capacity of a feeling classifeir, decide si the given quote is 'positive' or 'negative'. ",
119
+ "transfer_ori_acc": 94.95412844036697,
120
+ "transfer_atk_acc": 95.06880733944955
121
+ },
122
+ {
123
+ "dataset": "wnli",
124
+ "type": "task",
125
+ "origin_acc": 69.01,
126
+ "origin_prompt": "Indicate if the connection between the following sentences is 'entailment' or 'not_entailment'.",
127
+ "atk_acc": 61.97,
128
+ "atk_prompt": "Indicate if the connection between the following sentences is 'entailment' or 'not_entailment' Vq3aBzuZcD. ",
129
+ "transfer_ori_acc": 64.7887323943662,
130
+ "transfer_atk_acc": 66.19718309859155
131
+ },
132
+ {
133
+ "dataset": "wnli",
134
+ "type": "role",
135
+ "origin_acc": 66.2,
136
+ "origin_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment'.",
137
+ "atk_acc": 59.15,
138
+ "atk_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment' is77sOXAu8. ",
139
+ "transfer_ori_acc": 57.74647887323944,
140
+ "transfer_atk_acc": 57.74647887323944
141
+ },
142
+ {
143
+ "dataset": "mmlu",
144
+ "type": "task",
145
+ "origin_acc": 67.5,
146
+ "origin_prompt": "In relation to the multiple-choice question on {}, please provide the accurate answer by choosing 'A', 'B', 'C', or 'D'.",
147
+ "atk_acc": 61.5,
148
+ "atk_prompt": "At relationship to the mul𝚝iple-choice question on {}, please proviԁe the accurate responding by choosing 'A', 'B', 'C', or 'D'. ",
149
+ "transfer_ori_acc": 46.808510638297875,
150
+ "transfer_atk_acc": 47.16312056737589
151
+ },
152
+ {
153
+ "dataset": "mmlu",
154
+ "type": "role",
155
+ "origin_acc": 65.5,
156
+ "origin_prompt": "As someone well-versed in {}, please address the multiple-choice question below by selecting 'A', 'B', 'C', or 'D'.",
157
+ "atk_acc": 61.5,
158
+ "atk_prompt": "As someone well-vesred in {}, please address the muⅼtiple-choice questioո below by selecting 'A', 'B', 'C', or 'D'.",
159
+ "transfer_ori_acc": 46.63120567375886,
160
+ "transfer_atk_acc": 46.09929078014184
161
+ },
162
+ {
163
+ "dataset": "squad_v2",
164
+ "type": "task",
165
+ "origin_acc": 72.75,
166
+ "origin_prompt": "Based on the given context, provide the best possible answer. If there's no answer available in the context, reply with 'unanswerable'.",
167
+ "atk_acc": 67.93,
168
+ "atk_prompt": "Based on the giveո context, provide the best pоssible answer. If there's no answer available in the contex𝚝, reply with 'unanswerable'.",
169
+ "transfer_ori_acc": 87.04992770387507,
170
+ "transfer_atk_acc": 87.54992770387507
171
+ },
172
+ {
173
+ "dataset": "squad_v2",
174
+ "type": "role",
175
+ "origin_acc": 70.63,
176
+ "origin_prompt": "As a proficient expert in the given context, search for the most relevant answer. If the answer cannot be found, respond by saying 'unanswerable'.",
177
+ "atk_acc": 68.31,
178
+ "atk_prompt": "As a profcient expert in the given cоntext, search for the most relevant answer. If the anwser cannot be unearthed, respond by saying 'unanswerable'.",
179
+ "transfer_ori_acc": 87.31659437054175,
180
+ "transfer_atk_acc": 87.81659437054175
181
+ },
182
+ {
183
+ "dataset": "iwslt",
184
+ "type": "task",
185
+ "origin_acc": 0.23,
186
+ "origin_prompt": "Please perform a translation of the given sentence, converting it from {} to {}.",
187
+ "atk_acc": 0.23,
188
+ "atk_prompt": "Pⅼease perform a translating of the givеn sentence, conversion it from {} to {}. ",
189
+ "transfer_ori_acc": 0.22004054628058534,
190
+ "transfer_atk_acc": 0.22116640313338157
191
+ },
192
+ {
193
+ "dataset": "iwslt",
194
+ "type": "role",
195
+ "origin_acc": 0.24,
196
+ "origin_prompt": "Assuming the responsibilities of a professional translator, translate the subsequent text passage from {} to {}.",
197
+ "atk_acc": 0.23,
198
+ "atk_prompt": "Assuminɡ the responsibilities of a professional transla𝚝or, translate the subsequent text passage from {} to {}. ",
199
+ "transfer_ori_acc": 0.22030199520456395,
200
+ "transfer_atk_acc": 0.22054117249828195
201
+ },
202
+ {
203
+ "dataset": "un_multi",
204
+ "type": "task",
205
+ "origin_acc": 0.35,
206
+ "origin_prompt": "Please perform a translation of the given sentence, converting it from {} to {}.",
207
+ "atk_acc": 0.35,
208
+ "atk_prompt": "Plеase perform a translation of the given sentence, convert it from {} to {}. ",
209
+ "transfer_ori_acc": 0.3076276888167265,
210
+ "transfer_atk_acc": 0.306696536710457
211
+ },
212
+ {
213
+ "dataset": "un_multi",
214
+ "type": "role",
215
+ "origin_acc": 0.35,
216
+ "origin_prompt": "As a qualified translator, convert the given sentence from its original {} language to the target language {}.",
217
+ "atk_acc": 0.34,
218
+ "atk_prompt": "As a eligible translator, convert the given sentence from its orіginal {} language to the tarɡet language {}. ",
219
+ "transfer_ori_acc": 0.31127691224033316,
220
+ "transfer_atk_acc": 0.3058942177709969
221
+ },
222
+ {
223
+ "dataset": "math",
224
+ "type": "task",
225
+ "origin_acc": 23.12,
226
+ "origin_prompt": "Calculate the answer to the following math query about {}.",
227
+ "atk_acc": 20.62,
228
+ "atk_prompt": "Calculate the answer to the 𝚏ollowing math query about {}. ",
229
+ "transfer_ori_acc": 16.25,
230
+ "transfer_atk_acc": 16.875
231
+ },
232
+ {
233
+ "dataset": "math",
234
+ "type": "role",
235
+ "origin_acc": 22.5,
236
+ "origin_prompt": "As a mathematics instructor, calculate the answer to the following problem related to {}.",
237
+ "atk_acc": 19.38,
238
+ "atk_prompt": "Since a mathematics istructor, calculate the answer to the following problem related to {}. ",
239
+ "transfer_ori_acc": 17.5,
240
+ "transfer_atk_acc": 17.5
241
+ }
242
+ ]
results_transfer/chatgpt_google-flan-t5-large/textfooler_0_shot.json ADDED
@@ -0,0 +1,242 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "dataset": "cola",
4
+ "type": "task",
5
+ "origin_acc": 79.0,
6
+ "origin_prompt": "Review the sentence below and identify whether its grammar is 'Acceptable' or 'Unacceptable':",
7
+ "atk_acc": 2.0,
8
+ "atk_prompt": "Revising the punishments below and locate whether its dictionary is 'Acceptable' or 'Unacceptable':",
9
+ "transfer_ori_acc": 76.5,
10
+ "transfer_atk_acc": 45.7
11
+ },
12
+ {
13
+ "dataset": "cola",
14
+ "type": "role",
15
+ "origin_acc": 80.0,
16
+ "origin_prompt": "While performing grammar analysis, classify the grammar of the following sentence as 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar:",
17
+ "atk_acc": 6.0,
18
+ "atk_prompt": "While performing jargon discusses, categories the language of the below sentence as 'acceptable' for mend jargon or 'unacceptable' for unfit grammar:",
19
+ "transfer_ori_acc": 75.5,
20
+ "transfer_atk_acc": 76.2
21
+ },
22
+ {
23
+ "dataset": "mrpc",
24
+ "type": "task",
25
+ "origin_acc": 75.0,
26
+ "origin_prompt": "Examine the pair of sentences and decide if their meanings are identical by answering with 'equivalent' or 'not_equivalent'.",
27
+ "atk_acc": 68.5,
28
+ "atk_prompt": "Examine the pair of sentences and selects if their regard are identical by responding with 'equivalent' or 'not_equivalent'. ",
29
+ "transfer_ori_acc": 82.1078431372549,
30
+ "transfer_atk_acc": 82.1078431372549
31
+ },
32
+ {
33
+ "dataset": "mrpc",
34
+ "type": "role",
35
+ "origin_acc": 75.5,
36
+ "origin_prompt": "In your capacity as a semantic assessment tool, evaluate the provided sentences and classify their meanings as 'equivalent' or 'not_equivalent'.",
37
+ "atk_acc": 0.0,
38
+ "atk_prompt": "About your expertise as a semantic appraisal means, evaluate the provided penalty and classify their meanings as 'equivalent' or 'not_equivalent'. ",
39
+ "transfer_ori_acc": 76.47058823529412,
40
+ "transfer_atk_acc": 76.47058823529412
41
+ },
42
+ {
43
+ "dataset": "qnli",
44
+ "type": "task",
45
+ "origin_acc": 77.5,
46
+ "origin_prompt": "Consider the context and question, and indicate if the answer can be logically deduced from the context by responding with 'entailment' or 'not_entailment'.",
47
+ "atk_acc": 0.5,
48
+ "atk_prompt": "Recital the context and items, and specify if the solution can be logically subtracted from the framework by reactions with 'entailment' or 'not_entailment'. ",
49
+ "transfer_ori_acc": 83.39999999999999,
50
+ "transfer_atk_acc": 76.5
51
+ },
52
+ {
53
+ "dataset": "qnli",
54
+ "type": "role",
55
+ "origin_acc": 78.0,
56
+ "origin_prompt": "As a semantic researcher, evaluate whether the provided context supports the answer to the question and choose 'entailment' or 'not_entailment'.",
57
+ "atk_acc": 0.0,
58
+ "atk_prompt": "As a semantic fellows, measuring whether the rendered background supports the answers to the item and chosen 'entailment' or 'not_entailment'. ",
59
+ "transfer_ori_acc": 89.1,
60
+ "transfer_atk_acc": 78.10000000000001
61
+ },
62
+ {
63
+ "dataset": "qqp",
64
+ "type": "task",
65
+ "origin_acc": 80.5,
66
+ "origin_prompt": "Can these two statements be considered equal in meaning? Answer with 'equivalent' or 'not_equivalent'.",
67
+ "atk_acc": 0.0,
68
+ "atk_prompt": "Kan these two statement is studied same in sens? Reaction with 'equivalent' or 'not_equivalent'. ",
69
+ "transfer_ori_acc": 87.8,
70
+ "transfer_atk_acc": 59.3
71
+ },
72
+ {
73
+ "dataset": "qqp",
74
+ "type": "role",
75
+ "origin_acc": 80.5,
76
+ "origin_prompt": "Functioning as a question similarity evaluation tool, analyze the given questions and decide if they share the same meaning, responding with 'equivalent' or 'not_equivalent'.",
77
+ "atk_acc": 76.5,
78
+ "atk_prompt": "Functioning as a question similarity evaluation tool, analyse the given questions and decide if they share the same feel, responding with 'equivalent' or 'not_equivalent'. ",
79
+ "transfer_ori_acc": 87.3,
80
+ "transfer_atk_acc": 87.0
81
+ },
82
+ {
83
+ "dataset": "rte",
84
+ "type": "task",
85
+ "origin_acc": 83.5,
86
+ "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.",
87
+ "atk_acc": 60.5,
88
+ "atk_prompt": "Pose the aftermath two phrases entailment or not_entailment? Reactions me with \"entailment\" or \"not_entailment\", just one adjectives. ",
89
+ "transfer_ori_acc": 88.08664259927798,
90
+ "transfer_atk_acc": 82.31046931407943
91
+ },
92
+ {
93
+ "dataset": "rte",
94
+ "type": "role",
95
+ "origin_acc": 81.0,
96
+ "origin_prompt": "As an instrument for entailment evaluation, consider the two sentences and determine if their relationship is 'entailment' or 'not_entailment'. Respond with 'entailment' or 'not_entailment':",
97
+ "atk_acc": 60.0,
98
+ "atk_prompt": "Because an devices for entailment appraised, revisit the two phrases and identifying if their correlate is 'entailment' or 'not_entailment'. Answered with 'entailment' or 'not_entailment':",
99
+ "transfer_ori_acc": 85.5595667870036,
100
+ "transfer_atk_acc": 85.92057761732852
101
+ },
102
+ {
103
+ "dataset": "sst2",
104
+ "type": "task",
105
+ "origin_acc": 90.5,
106
+ "origin_prompt": "Review this statement and decide whether it has a 'positive' or 'negative' sentiment:",
107
+ "atk_acc": 0.0,
108
+ "atk_prompt": "Analysis this assertion and defining whether it ai a 'positive' or 'negative' sentiment: ",
109
+ "transfer_ori_acc": 95.06880733944955,
110
+ "transfer_atk_acc": 95.29816513761467
111
+ },
112
+ {
113
+ "dataset": "sst2",
114
+ "type": "role",
115
+ "origin_acc": 94.0,
116
+ "origin_prompt": "Emulating a sentiment classification system, indicate whether the provided text is 'positive' or 'negative'. Classify:",
117
+ "atk_acc": 4.5,
118
+ "atk_prompt": "Emulating a foreboding classifies regime, explain whether the stipulated laws is 'positive' or 'negative'. Grading: ",
119
+ "transfer_ori_acc": 94.72477064220183,
120
+ "transfer_atk_acc": 91.5137614678899
121
+ },
122
+ {
123
+ "dataset": "wnli",
124
+ "type": "task",
125
+ "origin_acc": 69.01,
126
+ "origin_prompt": "Identify whether the given pair of sentences demonstrates entailment or not_entailment. Answer with 'entailment' or 'not_entailment'.",
127
+ "atk_acc": 52.11,
128
+ "atk_prompt": "Identify whether the given pair of sentence exposes entailment or not_entailment. Retaliated with 'entailment' or 'not_entailment'.",
129
+ "transfer_ori_acc": 59.154929577464785,
130
+ "transfer_atk_acc": 60.56338028169014
131
+ },
132
+ {
133
+ "dataset": "wnli",
134
+ "type": "role",
135
+ "origin_acc": 67.61,
136
+ "origin_prompt": "Acting as an entailment detection instrument, determine if the given pair of sentences demonstrates entailment or not_entailment. Answer with 'entailment' or 'not_entailment':",
137
+ "atk_acc": 46.48,
138
+ "atk_prompt": "Acting as an entailment detection instrument, determine if the given torque of sentences depicting entailment or not_entailment. Reactions with 'entailment' or 'not_entailment':",
139
+ "transfer_ori_acc": 59.154929577464785,
140
+ "transfer_atk_acc": 59.154929577464785
141
+ },
142
+ {
143
+ "dataset": "mmlu",
144
+ "type": "task",
145
+ "origin_acc": 64.0,
146
+ "origin_prompt": "In relation to the multiple-choice question on {}, please provide the accurate answer by choosing 'A', 'B', 'C', or 'D'.",
147
+ "atk_acc": 50.5,
148
+ "atk_prompt": "In relation to the multiple-choice issue on {}, please delivering the clarification rebuttal by selector 'A', 'B', 'C', or 'D'. ",
149
+ "transfer_ori_acc": 44.858156028368796,
150
+ "transfer_atk_acc": 42.02127659574468
151
+ },
152
+ {
153
+ "dataset": "mmlu",
154
+ "type": "role",
155
+ "origin_acc": 61.5,
156
+ "origin_prompt": "As a skilled person in the field of {}, please respond to the multiple-choice question by choosing 'A', 'B', 'C', or 'D'.",
157
+ "atk_acc": 54.5,
158
+ "atk_prompt": "As a expert person in the minefields of {}, please reacts to the multiple-choice question by choice 'A', 'B', 'C', or 'D'.",
159
+ "transfer_ori_acc": 44.680851063829785,
160
+ "transfer_atk_acc": 44.50354609929078
161
+ },
162
+ {
163
+ "dataset": "squad_v2",
164
+ "type": "task",
165
+ "origin_acc": 70.01,
166
+ "origin_prompt": "Based on the given context, provide the best possible answer. If there's no answer available in the context, respond with 'unanswerable'.",
167
+ "atk_acc": 5.68,
168
+ "atk_prompt": "Based on the received backgrounds, provide the nicest unlikely respond. Although there's no riposte available in the context, reactions with 'unanswerable'.",
169
+ "transfer_ori_acc": 87.04992770387507,
170
+ "transfer_atk_acc": 81.35502139314727
171
+ },
172
+ {
173
+ "dataset": "squad_v2",
174
+ "type": "role",
175
+ "origin_acc": 69.17,
176
+ "origin_prompt": "Using your knowledge of the context, identify the best answer to the question. If the context doesn't provide an answer, write 'unanswerable'.",
177
+ "atk_acc": 12.54,
178
+ "atk_prompt": "Using your skill of the background, specify the improved rebuttal to the question. While the context doesn't delivering an reactions, writings 'unanswerable'.",
179
+ "transfer_ori_acc": 87.54992770387507,
180
+ "transfer_atk_acc": 74.81077674890263
181
+ },
182
+ {
183
+ "dataset": "iwslt",
184
+ "type": "task",
185
+ "origin_acc": 0.22,
186
+ "origin_prompt": "Using machine translation, convert the given sentence from {} into the {} language:",
187
+ "atk_acc": 0.0,
188
+ "atk_prompt": "Using typewriter translation, transformation the given sentence from {} into the {} pronunciation:",
189
+ "transfer_ori_acc": 0.13366423747896072,
190
+ "transfer_atk_acc": 0.13708349489640922
191
+ },
192
+ {
193
+ "dataset": "iwslt",
194
+ "type": "role",
195
+ "origin_acc": 0.22,
196
+ "origin_prompt": "Taking on the role of a certified translator, perform a translation of the given sentence from {} to {}:",
197
+ "atk_acc": 0.0,
198
+ "atk_prompt": "Select on the features of a certifiable translators, conducting a translation of the gave sentencing from {} to {}: ",
199
+ "transfer_ori_acc": 0.14266566803921382,
200
+ "transfer_atk_acc": 0.1319452584104338
201
+ },
202
+ {
203
+ "dataset": "un_multi",
204
+ "type": "task",
205
+ "origin_acc": 0.34,
206
+ "origin_prompt": "Translate the subsequent text passage from its original {} language to the {} language:",
207
+ "atk_acc": 0.07,
208
+ "atk_prompt": "Translate the sequential version passage from its primitive {} grammar to the {} pronunciation:",
209
+ "transfer_ori_acc": 0.2679607664026083,
210
+ "transfer_atk_acc": 0.26331875982607134
211
+ },
212
+ {
213
+ "dataset": "un_multi",
214
+ "type": "role",
215
+ "origin_acc": 0.34,
216
+ "origin_prompt": "While acting as an experienced language expert, translate the following text from {} to {}:",
217
+ "atk_acc": 0.27,
218
+ "atk_prompt": "While acting as an suffered dialect expert, translate the below laws from {} to {}: ",
219
+ "transfer_ori_acc": 0.26489202618745367,
220
+ "transfer_atk_acc": 0.2534345391549418
221
+ },
222
+ {
223
+ "dataset": "math",
224
+ "type": "task",
225
+ "origin_acc": 6.88,
226
+ "origin_prompt": "Obtain the result for this math question regarding {}:",
227
+ "atk_acc": 0.0,
228
+ "atk_prompt": "Obtain the achievements for this math question respecting {}:",
229
+ "transfer_ori_acc": 14.374999999999998,
230
+ "transfer_atk_acc": 14.374999999999998
231
+ },
232
+ {
233
+ "dataset": "math",
234
+ "type": "role",
235
+ "origin_acc": 5.0,
236
+ "origin_prompt": "As a mathematics instructor, calculate the answer to the following problem related to {}:",
237
+ "atk_acc": 0.0,
238
+ "atk_prompt": "As a mathematics instructor, calculate the rebuttal to the following problem related to {}: ",
239
+ "transfer_ori_acc": 11.875,
240
+ "transfer_atk_acc": 11.25
241
+ }
242
+ ]
results_transfer/chatgpt_google-flan-t5-large/textfooler_3_shot.json ADDED
@@ -0,0 +1,242 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "dataset": "cola",
4
+ "type": "task",
5
+ "origin_acc": 80.5,
6
+ "origin_prompt": "Examine the sentence and decide if its grammar is 'Acceptable' or 'Unacceptable'.",
7
+ "atk_acc": 39.5,
8
+ "atk_prompt": "Exams the sentence and defining if its jargon is 'Acceptable' or 'Unacceptable'. ",
9
+ "transfer_ori_acc": 76.3,
10
+ "transfer_atk_acc": 74.4
11
+ },
12
+ {
13
+ "dataset": "cola",
14
+ "type": "role",
15
+ "origin_acc": 81.0,
16
+ "origin_prompt": "In the role of a syntax analyzer, examine the grammar of the provided sentence and decide if it is correct, answering with 'acceptable' for grammatically correct sentences or 'unacceptable' for incorrect ones.",
17
+ "atk_acc": 76.0,
18
+ "atk_prompt": "Across the role of a syntax analyzer, examine the dictionary of the provided chastisement and decide if it is adequate, answered with 'acceptable' for grammatically correct sentences or 'unacceptable' for deceptive ones. ",
19
+ "transfer_ori_acc": 74.2,
20
+ "transfer_atk_acc": 73.3
21
+ },
22
+ {
23
+ "dataset": "mrpc",
24
+ "type": "task",
25
+ "origin_acc": 73.0,
26
+ "origin_prompt": "Assess if the two given sentences have equivalent meanings by selecting 'equivalent' or 'not_equivalent'.",
27
+ "atk_acc": 59.0,
28
+ "atk_prompt": "Appraisal if the two accorded sentences enjoys equivalent connotation by choice 'equivalent' or 'not_equivalent'. ",
29
+ "transfer_ori_acc": 82.84313725490196,
30
+ "transfer_atk_acc": 83.08823529411765
31
+ },
32
+ {
33
+ "dataset": "mrpc",
34
+ "type": "role",
35
+ "origin_acc": 71.5,
36
+ "origin_prompt": "In the role of a textual comparison specialist, examine the given sentences and decide if they share the same meaning, responding with 'equivalent' or 'not_equivalent'.",
37
+ "atk_acc": 57.0,
38
+ "atk_prompt": "De the role of a textual comparisons scholar, revisited the given sentences and decide if they exchange the same feel, responding with 'equivalent' or 'not_equivalent'. ",
39
+ "transfer_ori_acc": 79.65686274509804,
40
+ "transfer_atk_acc": 79.16666666666666
41
+ },
42
+ {
43
+ "dataset": "qnli",
44
+ "type": "task",
45
+ "origin_acc": 84.5,
46
+ "origin_prompt": "Examine the context and question, and determine if the context logically implies the answer by selecting 'entailment' or 'not_entailment'.",
47
+ "atk_acc": 63.5,
48
+ "atk_prompt": "Discuss the background and question, and determine if the framework intuitively presupposes the answer by akhtar 'entailment' or 'not_entailment'. ",
49
+ "transfer_ori_acc": 86.5,
50
+ "transfer_atk_acc": 80.30000000000001
51
+ },
52
+ {
53
+ "dataset": "qnli",
54
+ "type": "role",
55
+ "origin_acc": 85.5,
56
+ "origin_prompt": "As a textual analyst, examine if the given context logically implies the answer to the question and indicate your decision with 'entailment' or 'not_entailment'.",
57
+ "atk_acc": 68.5,
58
+ "atk_prompt": "Iike a textual analyst, analyze if the submitted regard rationally implies the replies to the issuing and indicates your clout with 'entailment' or 'not_entailment'. ",
59
+ "transfer_ori_acc": 88.8,
60
+ "transfer_atk_acc": 86.1
61
+ },
62
+ {
63
+ "dataset": "qqp",
64
+ "type": "task",
65
+ "origin_acc": 83.5,
66
+ "origin_prompt": "Determine if the given pair of statements can be considered the same by responding with 'equivalent' or 'not_equivalent'.",
67
+ "atk_acc": 80.0,
68
+ "atk_prompt": "Determine if the given pair of representations can constitute felt the same by responding with 'equivalent' or 'not_equivalent'. ",
69
+ "transfer_ori_acc": 86.8,
70
+ "transfer_atk_acc": 86.9
71
+ },
72
+ {
73
+ "dataset": "qqp",
74
+ "type": "role",
75
+ "origin_acc": 83.0,
76
+ "origin_prompt": "Acting as a question equivalence instrument, determine if the provided questions are equivalent in meaning, answering with 'equivalent' for similar questions or 'not_equivalent' for dissimilar ones.",
77
+ "atk_acc": 80.0,
78
+ "atk_prompt": "Interim as a issues equivalence instrument, determine if the provided aspects are equivalent in meaning, answering with 'equivalent' for selfsame questions or 'not_equivalent' for dissimilar ones. ",
79
+ "transfer_ori_acc": 86.5,
80
+ "transfer_atk_acc": 86.4
81
+ },
82
+ {
83
+ "dataset": "rte",
84
+ "type": "task",
85
+ "origin_acc": 84.5,
86
+ "origin_prompt": "Does the relationship between the given sentences represent entailment or not_entailment? Respond with 'entailment' or 'not_entailment'.",
87
+ "atk_acc": 79.5,
88
+ "atk_prompt": "Do the coitus between the given sentences comprised entailment or not_entailment? Meet with 'entailment' or 'not_entailment'. ",
89
+ "transfer_ori_acc": 83.75451263537906,
90
+ "transfer_atk_acc": 81.94945848375451
91
+ },
92
+ {
93
+ "dataset": "rte",
94
+ "type": "role",
95
+ "origin_acc": 83.0,
96
+ "origin_prompt": "As an entailment identification system, examine the connection between the following sentences and respond with 'entailment' or 'not_entailment'.",
97
+ "atk_acc": 79.5,
98
+ "atk_prompt": "As an entailment identification system, revisited the connection between the following sentences and respondent with 'entailment' or 'not_entailment'. ",
99
+ "transfer_ori_acc": 82.67148014440433,
100
+ "transfer_atk_acc": 83.03249097472924
101
+ },
102
+ {
103
+ "dataset": "sst2",
104
+ "type": "task",
105
+ "origin_acc": 96.0,
106
+ "origin_prompt": "Read the provided excerpt and choose between 'positive' and 'negative' to describe its sentiment.",
107
+ "atk_acc": 85.0,
108
+ "atk_prompt": "Lu the rendered excerpt and withheld between 'positive' and 'negative' to describe its foreboding. ",
109
+ "transfer_ori_acc": 94.72477064220183,
110
+ "transfer_atk_acc": 95.18348623853211
111
+ },
112
+ {
113
+ "dataset": "sst2",
114
+ "type": "role",
115
+ "origin_acc": 97.5,
116
+ "origin_prompt": "Emulating a sentiment classification system, indicate whether the provided text is 'positive' or 'negative'.",
117
+ "atk_acc": 91.5,
118
+ "atk_prompt": "Emulating a ambience classification mechanisms, describe whether the presented writings is 'positive' or 'negative'. ",
119
+ "transfer_ori_acc": 94.95412844036697,
120
+ "transfer_atk_acc": 95.29816513761467
121
+ },
122
+ {
123
+ "dataset": "wnli",
124
+ "type": "task",
125
+ "origin_acc": 69.01,
126
+ "origin_prompt": "Indicate if the connection between the following sentences is 'entailment' or 'not_entailment'.",
127
+ "atk_acc": 59.15,
128
+ "atk_prompt": "Indicate if the connection between the subsequently sentences is 'entailment' or 'not_entailment'. ",
129
+ "transfer_ori_acc": 64.7887323943662,
130
+ "transfer_atk_acc": 61.97183098591549
131
+ },
132
+ {
133
+ "dataset": "wnli",
134
+ "type": "role",
135
+ "origin_acc": 66.2,
136
+ "origin_prompt": "In the role of a semantic relationship analyzer, examine the connection between the given sentences and decide if they exhibit entailment or not_entailment. Answer with 'entailment' or 'not_entailment'.",
137
+ "atk_acc": 57.75,
138
+ "atk_prompt": "In the role of a semantic contacts analyzer, analysed the connecting between the given sentences and choose if they exhibit entailment or not_entailment. Answer with 'entailment' or 'not_entailment'. ",
139
+ "transfer_ori_acc": 56.33802816901409,
140
+ "transfer_atk_acc": 54.929577464788736
141
+ },
142
+ {
143
+ "dataset": "mmlu",
144
+ "type": "task",
145
+ "origin_acc": 67.0,
146
+ "origin_prompt": "Please respond to the multiple-choice question about {} by selecting the appropriate answer: 'A', 'B', 'C', or 'D'.",
147
+ "atk_acc": 60.0,
148
+ "atk_prompt": "Asking reactions to the multiple-choice question about {} by selecting the appropriate reacts: 'A', 'B', 'C', or 'D'. ",
149
+ "transfer_ori_acc": 45.21276595744681,
150
+ "transfer_atk_acc": 43.97163120567376
151
+ },
152
+ {
153
+ "dataset": "mmlu",
154
+ "type": "role",
155
+ "origin_acc": 66.0,
156
+ "origin_prompt": "With your knowledge of {}, tackle the following multiple-choice question by choosing 'A', 'B', 'C', or 'D'.",
157
+ "atk_acc": 61.5,
158
+ "atk_prompt": "From your acquaintances of {}, addressing the following multiple-choice question by choosing 'A', 'B', 'C', or 'D'.",
159
+ "transfer_ori_acc": 46.09929078014184,
160
+ "transfer_atk_acc": 45.56737588652482
161
+ },
162
+ {
163
+ "dataset": "squad_v2",
164
+ "type": "task",
165
+ "origin_acc": 72.8,
166
+ "origin_prompt": "Using the context, determine the most suitable answer. If the context doesn't contain the answer, write 'unanswerable'.",
167
+ "atk_acc": 63.67,
168
+ "atk_prompt": "Using the context, identifies the most suitable answer. Although the context doesn't comprise the meet, editor 'unanswerable'.",
169
+ "transfer_ori_acc": 87.04992770387507,
170
+ "transfer_atk_acc": 87.31659437054175
171
+ },
172
+ {
173
+ "dataset": "squad_v2",
174
+ "type": "role",
175
+ "origin_acc": 71.93,
176
+ "origin_prompt": "As a proficient expert in the given context, search for the most relevant answer. If the answer cannot be found, respond by saying 'unanswerable'.",
177
+ "atk_acc": 64.13,
178
+ "atk_prompt": "As a knowledge expert in the given context, find for the most relevant answering. Unless the answer cannot exist found, respond by told 'unanswerable'.",
179
+ "transfer_ori_acc": 87.31659437054175,
180
+ "transfer_atk_acc": 87.51356406751144
181
+ },
182
+ {
183
+ "dataset": "iwslt",
184
+ "type": "task",
185
+ "origin_acc": 0.23,
186
+ "origin_prompt": "Given the sentence below, perform a machine translation from {} to {}.",
187
+ "atk_acc": 0.18,
188
+ "atk_prompt": "Attributed the sentence below, meet a equipments interpreters from {} to {}. ",
189
+ "transfer_ori_acc": 0.21292285800980326,
190
+ "transfer_atk_acc": 0.2153933980722541
191
+ },
192
+ {
193
+ "dataset": "iwslt",
194
+ "type": "role",
195
+ "origin_acc": 0.24,
196
+ "origin_prompt": "In the role of a language expert, perform a machine translation for the provided sentence, changing it from {} to {}.",
197
+ "atk_acc": 0.23,
198
+ "atk_prompt": "Of the features of a language expert, performed a machine translation for the presented sentence, variation it from {} to {}. ",
199
+ "transfer_ori_acc": 0.22070346732323043,
200
+ "transfer_atk_acc": 0.21889644783553872
201
+ },
202
+ {
203
+ "dataset": "un_multi",
204
+ "type": "task",
205
+ "origin_acc": 0.35,
206
+ "origin_prompt": "Translate the following text from the source language {} to the target language {}.",
207
+ "atk_acc": 0.33,
208
+ "atk_prompt": "Translate the following texts from the source lingo {} to the millennium phrasing {}. ",
209
+ "transfer_ori_acc": 0.3090347352409074,
210
+ "transfer_atk_acc": 0.30701960419660657
211
+ },
212
+ {
213
+ "dataset": "un_multi",
214
+ "type": "role",
215
+ "origin_acc": 0.35,
216
+ "origin_prompt": "As a qualified translator, convert the given sentence from its original {} language to the target language {}.",
217
+ "atk_acc": 0.34,
218
+ "atk_prompt": "As a nuanced translation, metamorphosis the gave sentence from its original {} terminology to the aspirations grammar {}. ",
219
+ "transfer_ori_acc": 0.31127691224033316,
220
+ "transfer_atk_acc": 0.30631346636342727
221
+ },
222
+ {
223
+ "dataset": "math",
224
+ "type": "task",
225
+ "origin_acc": 23.75,
226
+ "origin_prompt": "Compute the result of this math task concerning {}.",
227
+ "atk_acc": 1.88,
228
+ "atk_prompt": "Estimate the accomplishments of this math task concerning {}. ",
229
+ "transfer_ori_acc": 16.25,
230
+ "transfer_atk_acc": 15.625
231
+ },
232
+ {
233
+ "dataset": "math",
234
+ "type": "role",
235
+ "origin_acc": 22.5,
236
+ "origin_prompt": "As a mathematics instructor, calculate the answer to the following problem related to {}.",
237
+ "atk_acc": 1.88,
238
+ "atk_prompt": "As a computations instructor, derive the rebuttal to the following hassles links to {}. ",
239
+ "transfer_ori_acc": 17.5,
240
+ "transfer_atk_acc": 17.5
241
+ }
242
+ ]
results_transfer/chatgpt_google-flan-t5-large/translation_0_shot.json ADDED
@@ -0,0 +1,122 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "dataset": "cola",
4
+ "type": "translation",
5
+ "origin_acc": 79.5,
6
+ "origin_prompt": "Functioning as a grammar evaluation tool, analyze the given sentence and decide if it is grammatically correct, responding with 'acceptable' or 'unacceptable':",
7
+ "atk_acc": 25.0,
8
+ "atk_prompt": "Classify the grammar of a given sentence into two categories, 'acceptable' or 'unacceptable', and judge its accuracy.",
9
+ "transfer_ori_acc": 76.3,
10
+ "transfer_atk_acc": 75.3
11
+ },
12
+ {
13
+ "dataset": "mrpc",
14
+ "type": "translation",
15
+ "origin_acc": 76.5,
16
+ "origin_prompt": "Can the given sentences be considered semantically identical? Please reply with 'equivalent' or 'not_equivalent'.",
17
+ "atk_acc": 7.0,
18
+ "atk_prompt": "As an expert in semantic comparison, assess the similarity between the following two sentences and determine whether they are 'equivalent' or 'not_equivalent'.",
19
+ "transfer_ori_acc": 80.63725490196079,
20
+ "transfer_atk_acc": 83.82352941176471
21
+ },
22
+ {
23
+ "dataset": "qnli",
24
+ "type": "translation",
25
+ "origin_acc": 79.5,
26
+ "origin_prompt": "Assess if the answer to the question can be logically concluded from the provided context by choosing 'entailment' or 'not_entailment'.",
27
+ "atk_acc": 56.5,
28
+ "atk_prompt": "Check the text link to the question and answer 'entailment' or 'not_entailment', depending on your language skills.",
29
+ "transfer_ori_acc": 84.8,
30
+ "transfer_atk_acc": 56.3
31
+ },
32
+ {
33
+ "dataset": "qqp",
34
+ "type": "translation",
35
+ "origin_acc": 84.0,
36
+ "origin_prompt": "Determine if the given pair of statements can be considered the same by responding with 'equivalent' or 'not_equivalent'.",
37
+ "atk_acc": 3.0,
38
+ "atk_prompt": "As a tool for determining the equivalence of problems, the problems are examined and their similarity is classified as 'equivalent' or 'not_equivalent'.",
39
+ "transfer_ori_acc": 87.6,
40
+ "transfer_atk_acc": 72.0
41
+ },
42
+ {
43
+ "dataset": "rte",
44
+ "type": "translation",
45
+ "origin_acc": 84.0,
46
+ "origin_prompt": "Determine if the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment'.",
47
+ "atk_acc": 39.0,
48
+ "atk_prompt": "Determine whether the given sentences have a cause-effect relationship as an engagement analysis tool and categorize them as 'entailment' or 'not_entailment'.",
49
+ "transfer_ori_acc": 84.11552346570397,
50
+ "transfer_atk_acc": 85.5595667870036
51
+ },
52
+ {
53
+ "dataset": "sst2",
54
+ "type": "translation",
55
+ "origin_acc": 96.0,
56
+ "origin_prompt": "Read the provided excerpt and choose between 'positive' and 'negative' to describe its sentiment:",
57
+ "atk_acc": 29.5,
58
+ "atk_prompt": "In the context of textual analysis, what classification is this phrase between 'positive' and 'negative'?",
59
+ "transfer_ori_acc": 94.83944954128441,
60
+ "transfer_atk_acc": 94.15137614678899
61
+ },
62
+ {
63
+ "dataset": "wnli",
64
+ "type": "translation",
65
+ "origin_acc": 67.61,
66
+ "origin_prompt": "Examine the pair of sentences and determine if they exhibit entailment or not_entailment. Answer with either 'entailment' or 'not_entailment'.",
67
+ "atk_acc": 26.76,
68
+ "atk_prompt": "Using implication analysis, determine whether the given sentences have a cause-effect relationship and categorize them as 'entailment' or 'not_entailment'.",
69
+ "transfer_ori_acc": 61.97183098591549,
70
+ "transfer_atk_acc": 59.154929577464785
71
+ },
72
+ {
73
+ "dataset": "mmlu",
74
+ "type": "translation",
75
+ "origin_acc": 64.0,
76
+ "origin_prompt": "In relation to the multiple-choice question on {}, please provide the accurate answer by choosing 'A', 'B', 'C', or 'D'.",
77
+ "atk_acc": 49.5,
78
+ "atk_prompt": "Choose the correct answer to the multiple-choice question below. Is '{}' an 'A', 'B', 'C', or 'D'.",
79
+ "transfer_ori_acc": 44.858156028368796,
80
+ "transfer_atk_acc": 43.262411347517734
81
+ },
82
+ {
83
+ "dataset": "squad_v2",
84
+ "type": "translation",
85
+ "origin_acc": 72.37,
86
+ "origin_prompt": "Discover the best answer based on the context. If the context doesn't include an answer, respond with 'unanswerable'.",
87
+ "atk_acc": 46.08,
88
+ "atk_prompt": "Please tell me what your question is about. If there is no context in which you can provide an answer, respond with 'unanswerable'.",
89
+ "transfer_ori_acc": 87.54992770387507,
90
+ "transfer_atk_acc": 87.42265497660236
91
+ },
92
+ {
93
+ "dataset": "iwslt",
94
+ "type": "translation",
95
+ "origin_acc": 0.22,
96
+ "origin_prompt": "Using machine translation, convert the given sentence from {} into the {} language:",
97
+ "atk_acc": 0.11,
98
+ "atk_prompt": "Please convert the sentence below to {}, and then translate it to {},",
99
+ "transfer_ori_acc": 0.13366423747896072,
100
+ "transfer_atk_acc": 0.13525124600739777
101
+ },
102
+ {
103
+ "dataset": "un_multi",
104
+ "type": "translation",
105
+ "origin_acc": 0.34,
106
+ "origin_prompt": "Perform a machine translation for the provided sentence, changing it from {} to {}:",
107
+ "atk_acc": 0.16,
108
+ "atk_prompt": "Please convert the sentence below to {}, and then translate it to {},",
109
+ "transfer_ori_acc": 0.26814405340158265,
110
+ "transfer_atk_acc": 0.2698370828666449
111
+ },
112
+ {
113
+ "dataset": "math",
114
+ "type": "translation",
115
+ "origin_acc": 6.88,
116
+ "origin_prompt": "Calculate the answer to the following math query about {}:",
117
+ "atk_acc": 3.75,
118
+ "atk_prompt": "Use {} to solve the following mathematical problem.",
119
+ "transfer_ori_acc": 13.125,
120
+ "transfer_atk_acc": 15.625
121
+ }
122
+ ]
results_transfer/chatgpt_google-flan-t5-large/translation_3_shot.json ADDED
@@ -0,0 +1,122 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "dataset": "cola",
4
+ "type": "translation",
5
+ "origin_acc": 81.5,
6
+ "origin_prompt": "Functioning as a grammar evaluation tool, analyze the given sentence and decide if it is grammatically correct, responding with 'acceptable' or 'unacceptable'.",
7
+ "atk_acc": 68.5,
8
+ "atk_prompt": "Classify the grammar of a given sentence as 'acceptable' or 'unacceptable', and judge its accuracy.",
9
+ "transfer_ori_acc": 75.2,
10
+ "transfer_atk_acc": 75.8
11
+ },
12
+ {
13
+ "dataset": "mrpc",
14
+ "type": "translation",
15
+ "origin_acc": 74.5,
16
+ "origin_prompt": "Compare the following sentences and determine if they share the same semantic meaning by responding with 'equivalent' or 'not_equivalent'.",
17
+ "atk_acc": 65.0,
18
+ "atk_prompt": "Based on my experience in semantic analysis, classify the following two sentences between 'equivalent' or 'not_equivalent'.",
19
+ "transfer_ori_acc": 80.3921568627451,
20
+ "transfer_atk_acc": 81.61764705882352
21
+ },
22
+ {
23
+ "dataset": "qnli",
24
+ "type": "translation",
25
+ "origin_acc": 86.0,
26
+ "origin_prompt": "As a textual analyst, examine if the given context logically implies the answer to the question and indicate your decision with 'entailment' or 'not_entailment'.",
27
+ "atk_acc": 71.5,
28
+ "atk_prompt": "Check the text link to the question and answer 'entailment' or 'not_entailment', depending on your language skills.",
29
+ "transfer_ori_acc": 88.8,
30
+ "transfer_atk_acc": 56.699999999999996
31
+ },
32
+ {
33
+ "dataset": "qqp",
34
+ "type": "translation",
35
+ "origin_acc": 84.0,
36
+ "origin_prompt": "In the capacity of a question assessment system, indicate if the meaning of the provided questions is the same, responding with 'equivalent' or 'not_equivalent'.",
37
+ "atk_acc": 80.5,
38
+ "atk_prompt": "By analyzing the similarity of these problems, they are divided into categories of 'equivalent' or 'not_equivalent'.",
39
+ "transfer_ori_acc": 86.1,
40
+ "transfer_atk_acc": 87.5
41
+ },
42
+ {
43
+ "dataset": "rte",
44
+ "type": "translation",
45
+ "origin_acc": 84.5,
46
+ "origin_prompt": "Does the relationship between the given sentences represent entailment or not_entailment? Respond with 'entailment' or 'not_entailment'.",
47
+ "atk_acc": 80.0,
48
+ "atk_prompt": "Determine whether the given sentences have a cause-effect relationship as an engagement analysis tool and categorize them as 'entailment' or 'not_entailment'.",
49
+ "transfer_ori_acc": 83.75451263537906,
50
+ "transfer_atk_acc": 84.47653429602889
51
+ },
52
+ {
53
+ "dataset": "sst2",
54
+ "type": "translation",
55
+ "origin_acc": 97.5,
56
+ "origin_prompt": "In the capacity of a sentiment classifier, decide whether the given quote is 'positive' or 'negative'.",
57
+ "atk_acc": 68.5,
58
+ "atk_prompt": "In the context of textual analysis, what classification is this phrase between 'positive' and 'negative'?",
59
+ "transfer_ori_acc": 94.95412844036697,
60
+ "transfer_atk_acc": 94.61009174311926
61
+ },
62
+ {
63
+ "dataset": "wnli",
64
+ "type": "translation",
65
+ "origin_acc": 74.65,
66
+ "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.",
67
+ "atk_acc": 59.15,
68
+ "atk_prompt": "Determinate the semantic connections of a given sentence and classify it as 'entailment' or 'not_entailment'.",
69
+ "transfer_ori_acc": 67.6056338028169,
70
+ "transfer_atk_acc": 54.929577464788736
71
+ },
72
+ {
73
+ "dataset": "mmlu",
74
+ "type": "translation",
75
+ "origin_acc": 67.0,
76
+ "origin_prompt": "Please respond to the multiple-choice question about {} by selecting the appropriate answer: 'A', 'B', 'C', or 'D'.",
77
+ "atk_acc": 63.0,
78
+ "atk_prompt": "Rate your understanding of the multiple-choice question on {}. Choose the correct answer from options 'A', 'B', 'C' or 'D'.",
79
+ "transfer_ori_acc": 45.21276595744681,
80
+ "transfer_atk_acc": 45.56737588652482
81
+ },
82
+ {
83
+ "dataset": "squad_v2",
84
+ "type": "translation",
85
+ "origin_acc": 74.58,
86
+ "origin_prompt": "Please derive the most fitting answer from the context. If there isn't an answer in the context, use 'unanswerable'.",
87
+ "atk_acc": 67.46,
88
+ "atk_prompt": "Please provide additional information about the facts mentioned in this sentence. If no information is available, respond with 'unanswerable'.",
89
+ "transfer_ori_acc": 87.04992770387507,
90
+ "transfer_atk_acc": 86.92265497660236
91
+ },
92
+ {
93
+ "dataset": "iwslt",
94
+ "type": "translation",
95
+ "origin_acc": 0.24,
96
+ "origin_prompt": "Convert the following sentence from its original {} language to the target language {}.",
97
+ "atk_acc": 0.17,
98
+ "atk_prompt": "Please translate the sentences below into {}, then {}.",
99
+ "transfer_ori_acc": 0.2240211126734209,
100
+ "transfer_atk_acc": 0.20150103267232944
101
+ },
102
+ {
103
+ "dataset": "un_multi",
104
+ "type": "translation",
105
+ "origin_acc": 0.36,
106
+ "origin_prompt": "Translate the following text from the source language {} to the target language {}.",
107
+ "atk_acc": 0.25,
108
+ "atk_prompt": "Please translate the following sentences into {}, then {}.",
109
+ "transfer_ori_acc": 0.3090347352409074,
110
+ "transfer_atk_acc": 0.3050833075162871
111
+ },
112
+ {
113
+ "dataset": "math",
114
+ "type": "translation",
115
+ "origin_acc": 23.12,
116
+ "origin_prompt": "Compute the result of this math task concerning {}.",
117
+ "atk_acc": 18.75,
118
+ "atk_prompt": "Please calculate the answers related to {}.",
119
+ "transfer_ori_acc": 16.25,
120
+ "transfer_atk_acc": 16.875
121
+ }
122
+ ]
results_transfer/chatgpt_google-flan-ul2/bertattack_0_shot.json ADDED
@@ -0,0 +1,242 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "dataset": "cola",
4
+ "type": "task",
5
+ "origin_acc": 78.5,
6
+ "origin_prompt": "Assess the grammatical structure of the given sentence and classify it as 'Acceptable' or 'Unacceptable':",
7
+ "atk_acc": 0.0,
8
+ "atk_prompt": "what the grammatical structure of the given sentence and consider it as 'Acceptable' or 'Unacceptable':",
9
+ "transfer_ori_acc": 85.39999999999999,
10
+ "transfer_atk_acc": 85.5
11
+ },
12
+ {
13
+ "dataset": "cola",
14
+ "type": "role",
15
+ "origin_acc": 80.0,
16
+ "origin_prompt": "While performing grammar analysis, classify the grammar of the following sentence as 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar:",
17
+ "atk_acc": 0.0,
18
+ "atk_prompt": "While performing grammar analysis, classify the grammar of the following sentence as 'acceptable' for so grammar or 'unacceptable' for abnormal grammar:",
19
+ "transfer_ori_acc": 86.0,
20
+ "transfer_atk_acc": 85.5
21
+ },
22
+ {
23
+ "dataset": "mrpc",
24
+ "type": "task",
25
+ "origin_acc": 75.5,
26
+ "origin_prompt": "Examine the pair of sentences and decide if their meanings are identical by answering with 'equivalent' or 'not_equivalent'.",
27
+ "atk_acc": 56.0,
28
+ "atk_prompt": "Examine the pairs of pronouns and evaluate if their meanings are different by answering with 'equivalent' or 'not_equivalent'. ",
29
+ "transfer_ori_acc": 86.02941176470588,
30
+ "transfer_atk_acc": 84.06862745098039
31
+ },
32
+ {
33
+ "dataset": "mrpc",
34
+ "type": "role",
35
+ "origin_acc": 75.5,
36
+ "origin_prompt": "In your capacity as a semantic assessment tool, evaluate the provided sentences and classify their meanings as 'equivalent' or 'not_equivalent'.",
37
+ "atk_acc": 0.0,
38
+ "atk_prompt": "In your capacity as a semantic assessment tool, evaluate the provided tense and classify their constituents as 'equivalent' or 'not_equivalent'. ",
39
+ "transfer_ori_acc": 83.33333333333334,
40
+ "transfer_atk_acc": 83.08823529411765
41
+ },
42
+ {
43
+ "dataset": "qnli",
44
+ "type": "task",
45
+ "origin_acc": 79.0,
46
+ "origin_prompt": "Assess if the answer to the question can be logically concluded from the provided context by choosing 'entailment' or 'not_entailment'.",
47
+ "atk_acc": 0.0,
48
+ "atk_prompt": "and if the answer to the question can be logically concluded from the provided context by avoiding 'entailment' or 'not_entailment'. ",
49
+ "transfer_ori_acc": 94.0,
50
+ "transfer_atk_acc": 77.3
51
+ },
52
+ {
53
+ "dataset": "qnli",
54
+ "type": "role",
55
+ "origin_acc": 79.5,
56
+ "origin_prompt": "As a textual inference expert, analyze if the answer to the question can be deduced from the provided context and select 'entailment' or 'not_entailment'.",
57
+ "atk_acc": 0.0,
58
+ "atk_prompt": "As a textual inference expert, analysis if the answer to the question can be deduced from the provided citation and define 'entailment' or 'not_entailment'. ",
59
+ "transfer_ori_acc": 93.30000000000001,
60
+ "transfer_atk_acc": 94.3
61
+ },
62
+ {
63
+ "dataset": "qqp",
64
+ "type": "task",
65
+ "origin_acc": 79.5,
66
+ "origin_prompt": "Can these two statements be considered equal in meaning? Answer with 'equivalent' or 'not_equivalent'.",
67
+ "atk_acc": 1.0,
68
+ "atk_prompt": "Can these two constructions be considered equal in meaning? object with 'equivalent' or 'not_equivalent'. ",
69
+ "transfer_ori_acc": 87.4,
70
+ "transfer_atk_acc": 87.1
71
+ },
72
+ {
73
+ "dataset": "qqp",
74
+ "type": "role",
75
+ "origin_acc": 82.5,
76
+ "origin_prompt": "As an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions.",
77
+ "atk_acc": 0.0,
78
+ "atk_prompt": "give an table for category comparison evaluation, consider the questions and determine if their concept is the same, responding with 'equivalent' for similarity questions or 'not_equivalent' for different knowledge. ",
79
+ "transfer_ori_acc": 88.3,
80
+ "transfer_atk_acc": 89.2
81
+ },
82
+ {
83
+ "dataset": "rte",
84
+ "type": "task",
85
+ "origin_acc": 82.5,
86
+ "origin_prompt": "Determine if the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment'.",
87
+ "atk_acc": 55.5,
88
+ "atk_prompt": "determine if the following singular of sentence indicating entailment or not_entailment. contrast with 'entailment' or 'not_entailment'.",
89
+ "transfer_ori_acc": 93.50180505415162,
90
+ "transfer_atk_acc": 89.53068592057761
91
+ },
92
+ {
93
+ "dataset": "rte",
94
+ "type": "role",
95
+ "origin_acc": 81.5,
96
+ "origin_prompt": "Working as an entailment classifier, identify whether the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment':",
97
+ "atk_acc": 37.5,
98
+ "atk_prompt": "at as an entailment classifier, know which the given type of verb displays entailment or not_entailment. object with 'entailment' or 'not_entailment':",
99
+ "transfer_ori_acc": 94.58483754512635,
100
+ "transfer_atk_acc": 90.25270758122743
101
+ },
102
+ {
103
+ "dataset": "sst2",
104
+ "type": "task",
105
+ "origin_acc": 90.0,
106
+ "origin_prompt": "Review this statement and decide whether it has a 'positive' or 'negative' sentiment:",
107
+ "atk_acc": 0.0,
108
+ "atk_prompt": "what this assertion and are whether it has a 'positive' or 'negative' sentiment: ",
109
+ "transfer_ori_acc": 96.55963302752293,
110
+ "transfer_atk_acc": 96.44495412844036
111
+ },
112
+ {
113
+ "dataset": "sst2",
114
+ "type": "role",
115
+ "origin_acc": 93.0,
116
+ "origin_prompt": "Functioning as a sentiment identification tool, assess if the following expression is 'positive' or 'negative'. Classify:",
117
+ "atk_acc": 76.0,
118
+ "atk_prompt": "from as a sentiments identification tool, assess if the same word is 'positive' or 'negative'. function: ",
119
+ "transfer_ori_acc": 95.9862385321101,
120
+ "transfer_atk_acc": 95.52752293577981
121
+ },
122
+ {
123
+ "dataset": "wnli",
124
+ "type": "task",
125
+ "origin_acc": 69.01,
126
+ "origin_prompt": "Identify whether the given pair of sentences demonstrates entailment or not_entailment. Answer with 'entailment' or 'not_entailment'.",
127
+ "atk_acc": 47.89,
128
+ "atk_prompt": "Identify whether the given individual of verbs demonstrates entailment or not_entailment. have with 'entailment' or 'not_entailment'.",
129
+ "transfer_ori_acc": 78.87323943661971,
130
+ "transfer_atk_acc": 71.83098591549296
131
+ },
132
+ {
133
+ "dataset": "wnli",
134
+ "type": "role",
135
+ "origin_acc": 60.56,
136
+ "origin_prompt": "Working as an entailment classifier, identify whether the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment':",
137
+ "atk_acc": 47.89,
138
+ "atk_prompt": "Working as an entailment classifier, identify whether the given corpus of verbs displays entailment or not_entailment. act with 'entailment' or 'not_entailment':",
139
+ "transfer_ori_acc": 80.28169014084507,
140
+ "transfer_atk_acc": 69.01408450704226
141
+ },
142
+ {
143
+ "dataset": "mmlu",
144
+ "type": "task",
145
+ "origin_acc": 63.5,
146
+ "origin_prompt": "Regarding the following multiple-choice question on {}, pick the correct answer from the options 'A', 'B', 'C', or 'D'.",
147
+ "atk_acc": 14.5,
148
+ "atk_prompt": "1 the first multiple-choice question on {}, remove the following answer from the options 'A', 'B', 'C', or 'D'. ",
149
+ "transfer_ori_acc": 52.304964539007095,
150
+ "transfer_atk_acc": 15.957446808510639
151
+ },
152
+ {
153
+ "dataset": "mmlu",
154
+ "type": "role",
155
+ "origin_acc": 62.0,
156
+ "origin_prompt": "Drawing upon your mastery of {}, please answer the multiple-choice question by selecting the correct option from 'A', 'B', 'C', or 'D'.",
157
+ "atk_acc": 1.0,
158
+ "atk_prompt": "Drawing upon your mastery of {}, simply question the multiple-selecting question by selecting the wrong color from 'A', 'B', 'C', or 'D'.",
159
+ "transfer_ori_acc": 52.659574468085104,
160
+ "transfer_atk_acc": 14.893617021276595
161
+ },
162
+ {
163
+ "dataset": "squad_v2",
164
+ "type": "task",
165
+ "origin_acc": 72.47,
166
+ "origin_prompt": "Discover the best answer based on the context. If the context doesn't include an answer, respond with 'unanswerable'.",
167
+ "atk_acc": 2.76,
168
+ "atk_prompt": "challenge the best answer based on the context. however the context doesn't include an answer, play with 'unanswerable'.",
169
+ "transfer_ori_acc": 89.38229949874687,
170
+ "transfer_atk_acc": 89.88229949874687
171
+ },
172
+ {
173
+ "dataset": "squad_v2",
174
+ "type": "role",
175
+ "origin_acc": 65.63,
176
+ "origin_prompt": "Drawing on your expertise in the context, provide the most precise answer. If the answer is not in the context, respond with 'unanswerable'.",
177
+ "atk_acc": 1.86,
178
+ "atk_prompt": "Drawing on your technique in the frame, offer the most precise answer. saying the answer is not in the context, play with 'unanswerable'.",
179
+ "transfer_ori_acc": 89.48229949874687,
180
+ "transfer_atk_acc": 89.98229949874687
181
+ },
182
+ {
183
+ "dataset": "iwslt",
184
+ "type": "task",
185
+ "origin_acc": 0.22,
186
+ "origin_prompt": "Translate the provided sentence from {} to {} while maintaining the original meaning and context:",
187
+ "atk_acc": 0.08,
188
+ "atk_prompt": "Translate the designated sentence from {} to {} while defending the assumed meaning and context:",
189
+ "transfer_ori_acc": 0.1697518407093131,
190
+ "transfer_atk_acc": 0.17006977668295734
191
+ },
192
+ {
193
+ "dataset": "iwslt",
194
+ "type": "role",
195
+ "origin_acc": 0.22,
196
+ "origin_prompt": "Taking on the role of a certified translator, perform a translation of the given sentence from {} to {}:",
197
+ "atk_acc": 0.01,
198
+ "atk_prompt": "who on the role of a certified translated, request a translator of the defined sentence from {} to {}: ",
199
+ "transfer_ori_acc": 0.1727120569025014,
200
+ "transfer_atk_acc": 0.16908374849908425
201
+ },
202
+ {
203
+ "dataset": "un_multi",
204
+ "type": "task",
205
+ "origin_acc": 0.34,
206
+ "origin_prompt": "Translate the following text from the source language {} to the target language {}:",
207
+ "atk_acc": 0.18,
208
+ "atk_prompt": "Translate the by text from the source language {} to the english example {}:",
209
+ "transfer_ori_acc": 0.30893486445367974,
210
+ "transfer_atk_acc": 0.31345451169084315
211
+ },
212
+ {
213
+ "dataset": "un_multi",
214
+ "type": "role",
215
+ "origin_acc": 0.34,
216
+ "origin_prompt": "In the role of a language expert, perform a machine translation for the provided sentence, changing it from {} to {}:",
217
+ "atk_acc": 0.08,
218
+ "atk_prompt": "of the role of a language expertise, define a machine and for the presented sentence, changing it from {} to {}: ",
219
+ "transfer_ori_acc": 0.31256243540663714,
220
+ "transfer_atk_acc": 0.3081120174260553
221
+ },
222
+ {
223
+ "dataset": "math",
224
+ "type": "task",
225
+ "origin_acc": 6.25,
226
+ "origin_prompt": "Resolve the following mathematical question associated with {}:",
227
+ "atk_acc": 0.62,
228
+ "atk_prompt": "Resolve the following mathematical answer along with {}:",
229
+ "transfer_ori_acc": 14.374999999999998,
230
+ "transfer_atk_acc": 13.750000000000002
231
+ },
232
+ {
233
+ "dataset": "math",
234
+ "type": "role",
235
+ "origin_acc": 5.0,
236
+ "origin_prompt": "In the position of a mathematics coach, figure out the answer to this math challenge involving {}:",
237
+ "atk_acc": 0.0,
238
+ "atk_prompt": "In the position of a mathematics coach, develop out the theme to this math challenge involving {}: ",
239
+ "transfer_ori_acc": 10.625,
240
+ "transfer_atk_acc": 3.75
241
+ }
242
+ ]
results_transfer/chatgpt_google-flan-ul2/bertattack_3_shot.json ADDED
@@ -0,0 +1,242 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "dataset": "cola",
4
+ "type": "task",
5
+ "origin_acc": 81.0,
6
+ "origin_prompt": "Analyze the provided sentence and classify its grammatical correctness as 'Acceptable' or 'Unacceptable'.",
7
+ "atk_acc": 59.0,
8
+ "atk_prompt": "Analyze the reported speech and classify its inclusive correcting as 'Acceptable' or 'Unacceptable'. ",
9
+ "transfer_ori_acc": 86.0,
10
+ "transfer_atk_acc": 78.0
11
+ },
12
+ {
13
+ "dataset": "cola",
14
+ "type": "role",
15
+ "origin_acc": 81.0,
16
+ "origin_prompt": "In the role of a syntax analyzer, examine the grammar of the provided sentence and decide if it is correct, answering with 'acceptable' for grammatically correct sentences or 'unacceptable' for incorrect ones.",
17
+ "atk_acc": 75.5,
18
+ "atk_prompt": "using the art of a level analyzeg, examine the grammar of the provided sentence and decide if it is genuine, answering with 'acceptable' for grammatically correct sentences or 'unacceptable' for incorrect ones. ",
19
+ "transfer_ori_acc": 86.5,
20
+ "transfer_atk_acc": 86.3
21
+ },
22
+ {
23
+ "dataset": "mrpc",
24
+ "type": "task",
25
+ "origin_acc": 74.0,
26
+ "origin_prompt": "Compare the following sentences and determine if they share the same semantic meaning by responding with 'equivalent' or 'not_equivalent'.",
27
+ "atk_acc": 47.5,
28
+ "atk_prompt": "Compare the following sentences and determine if they lack the same aspect marking by respectively with 'equivalent' or 'not_equivalent'. ",
29
+ "transfer_ori_acc": 86.27450980392157,
30
+ "transfer_atk_acc": 72.05882352941177
31
+ },
32
+ {
33
+ "dataset": "mrpc",
34
+ "type": "role",
35
+ "origin_acc": 71.5,
36
+ "origin_prompt": "In the role of a textual comparison specialist, examine the given sentences and decide if they share the same meaning, responding with 'equivalent' or 'not_equivalent'.",
37
+ "atk_acc": 47.0,
38
+ "atk_prompt": "have the role of a textual comparison academic, examine the given sentences and determine if they share the same length, responding with 'equivalent' or 'not_equivalent'. ",
39
+ "transfer_ori_acc": 87.00980392156863,
40
+ "transfer_atk_acc": 86.02941176470588
41
+ },
42
+ {
43
+ "dataset": "qnli",
44
+ "type": "task",
45
+ "origin_acc": 84.0,
46
+ "origin_prompt": "Consider the context and question, and indicate if the answer can be logically deduced from the context by responding with 'entailment' or 'not_entailment'.",
47
+ "atk_acc": 71.0,
48
+ "atk_prompt": "put the contexts and question, and be if the answer can be logically deduced from the question by stating with 'entailment' or 'not_entailment'. ",
49
+ "transfer_ori_acc": 92.9,
50
+ "transfer_atk_acc": 93.30000000000001
51
+ },
52
+ {
53
+ "dataset": "qnli",
54
+ "type": "role",
55
+ "origin_acc": 84.0,
56
+ "origin_prompt": "As a textual inference expert, analyze if the answer to the question can be deduced from the provided context and select 'entailment' or 'not_entailment'.",
57
+ "atk_acc": 70.0,
58
+ "atk_prompt": "As a structural inference expert, analyze if the answer to the challenge can be deduced from the query audience and select 'entailment' or 'not_entailment'. ",
59
+ "transfer_ori_acc": 93.0,
60
+ "transfer_atk_acc": 94.1
61
+ },
62
+ {
63
+ "dataset": "qqp",
64
+ "type": "task",
65
+ "origin_acc": 82.5,
66
+ "origin_prompt": "Are the following two questions equivalent or not? Answer me with \"equivalent\" or \"not_equivalent\".",
67
+ "atk_acc": 78.5,
68
+ "atk_prompt": "Are the following versus answers equivalent or not? Answer me with \"equivalent\" or \"not_equivalent\". ",
69
+ "transfer_ori_acc": 88.5,
70
+ "transfer_atk_acc": 88.5
71
+ },
72
+ {
73
+ "dataset": "qqp",
74
+ "type": "role",
75
+ "origin_acc": 83.5,
76
+ "origin_prompt": "As an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions.",
77
+ "atk_acc": 78.0,
78
+ "atk_prompt": "and an instrument for question comparison analysis, consider the questions and establish if their topic is the same, responding with 'equivalent' for matching responses or 'not_equivalent' for between questions. ",
79
+ "transfer_ori_acc": 89.0,
80
+ "transfer_atk_acc": 90.7
81
+ },
82
+ {
83
+ "dataset": "rte",
84
+ "type": "task",
85
+ "origin_acc": 85.0,
86
+ "origin_prompt": "Review the two statements and categorize their relationship as either 'entailment' or 'not_entailment'.",
87
+ "atk_acc": 59.5,
88
+ "atk_prompt": "write the second statements and categorize their relationship as neither 'entailment' or 'not_entailment'. ",
89
+ "transfer_ori_acc": 92.7797833935018,
90
+ "transfer_atk_acc": 92.7797833935018
91
+ },
92
+ {
93
+ "dataset": "rte",
94
+ "type": "role",
95
+ "origin_acc": 83.5,
96
+ "origin_prompt": "While performing entailment analysis, classify the relationship between the provided sentences as 'entailment' or 'not_entailment'.",
97
+ "atk_acc": 79.0,
98
+ "atk_prompt": "was performing entailment analysis, classify the relationship between the first phrase as 'entailment' or 'not_entailment'. ",
99
+ "transfer_ori_acc": 93.14079422382672,
100
+ "transfer_atk_acc": 93.14079422382672
101
+ },
102
+ {
103
+ "dataset": "sst2",
104
+ "type": "task",
105
+ "origin_acc": 95.5,
106
+ "origin_prompt": "After examining the following expression, label its emotion as either 'positive' or 'negative'.",
107
+ "atk_acc": 94.5,
108
+ "atk_prompt": "After examining the following expression, label its emotion as either 'positive' or 'negative' Q47Jmd4lMV. ",
109
+ "transfer_ori_acc": 92.77522935779817,
110
+ "transfer_atk_acc": 92.66055045871559
111
+ },
112
+ {
113
+ "dataset": "sst2",
114
+ "type": "role",
115
+ "origin_acc": 97.5,
116
+ "origin_prompt": "In the capacity of a sentiment classifier, decide whether the given quote is 'positive' or 'negative'.",
117
+ "atk_acc": 96.5,
118
+ "atk_prompt": "In the capacity of a sentiment classifier, decide whether the given quote is 'positive' or 'negative' 9Q3S1F94fE. ",
119
+ "transfer_ori_acc": 96.44495412844036,
120
+ "transfer_atk_acc": 96.44495412844036
121
+ },
122
+ {
123
+ "dataset": "wnli",
124
+ "type": "task",
125
+ "origin_acc": 69.01,
126
+ "origin_prompt": "Review the two statements and categorize their relationship as either 'entailment' or 'not_entailment'.",
127
+ "atk_acc": 59.15,
128
+ "atk_prompt": "examine the paired actions and categorize their relationship as is 'entailment' or 'not_entailment'. ",
129
+ "transfer_ori_acc": 77.46478873239437,
130
+ "transfer_atk_acc": 76.05633802816901
131
+ },
132
+ {
133
+ "dataset": "wnli",
134
+ "type": "role",
135
+ "origin_acc": 66.2,
136
+ "origin_prompt": "In the role of a semantic relationship analyzer, examine the connection between the given sentences and decide if they exhibit entailment or not_entailment. Answer with 'entailment' or 'not_entailment'.",
137
+ "atk_acc": 56.34,
138
+ "atk_prompt": "In the role of a semantic relationship analyzer, define the connection between the given links and decide if they display entailment or not_entailment. Answer with 'entailment' or 'not_entailment'. ",
139
+ "transfer_ori_acc": 76.05633802816901,
140
+ "transfer_atk_acc": 76.05633802816901
141
+ },
142
+ {
143
+ "dataset": "mmlu",
144
+ "type": "task",
145
+ "origin_acc": 66.5,
146
+ "origin_prompt": "Please respond to the multiple-choice question about {} by selecting the appropriate answer: 'A', 'B', 'C', or 'D'.",
147
+ "atk_acc": 11.5,
148
+ "atk_prompt": "at respond to the multiple-independent question about {} by selecting the inappropriate question: 'A', 'B', 'C', or 'D'. ",
149
+ "transfer_ori_acc": 53.36879432624113,
150
+ "transfer_atk_acc": 14.893617021276595
151
+ },
152
+ {
153
+ "dataset": "mmlu",
154
+ "type": "role",
155
+ "origin_acc": 65.5,
156
+ "origin_prompt": "Considering your familiarity with {}, attend to the following multiple-choice question by picking 'A', 'B', 'C', or 'D'.",
157
+ "atk_acc": 52.5,
158
+ "atk_prompt": "all your ability with {}, attend to the following multiple-select question by picking 'A', 'B', 'C', or 'D'.",
159
+ "transfer_ori_acc": 54.43262411347518,
160
+ "transfer_atk_acc": 54.07801418439716
161
+ },
162
+ {
163
+ "dataset": "squad_v2",
164
+ "type": "task",
165
+ "origin_acc": 72.25,
166
+ "origin_prompt": "Based on the given context, provide the best possible answer. If there's no answer available in the context, reply with 'unanswerable'.",
167
+ "atk_acc": 65.81,
168
+ "atk_prompt": "Based on the use context, provide the best possible answer. who there'tis no answer provided in the area, reply with 'unanswerable'.",
169
+ "transfer_ori_acc": 90.04896616541353,
170
+ "transfer_atk_acc": 90.54896616541353
171
+ },
172
+ {
173
+ "dataset": "squad_v2",
174
+ "type": "role",
175
+ "origin_acc": 72.45,
176
+ "origin_prompt": "As a proficient expert in the given context, search for the most relevant answer. If the answer cannot be found, respond by saying 'unanswerable'.",
177
+ "atk_acc": 61.02,
178
+ "atk_prompt": "As a proficient expert in the given context, try for the most specific out. If the question lacks be search, end by as 'unanswerable'.",
179
+ "transfer_ori_acc": 90.54896616541353,
180
+ "transfer_atk_acc": 90.55717634603751
181
+ },
182
+ {
183
+ "dataset": "iwslt",
184
+ "type": "task",
185
+ "origin_acc": 0.24,
186
+ "origin_prompt": "Convert the following sentence from its original {} language to the target language {}.",
187
+ "atk_acc": 0.21,
188
+ "atk_prompt": "Convert the following sentence from its actual {} language to the net language {}. ",
189
+ "transfer_ori_acc": 0.23981220411098694,
190
+ "transfer_atk_acc": 0.22991007113697723
191
+ },
192
+ {
193
+ "dataset": "iwslt",
194
+ "type": "role",
195
+ "origin_acc": 0.24,
196
+ "origin_prompt": "Assuming the responsibilities of a professional translator, translate the subsequent text passage from {} to {}.",
197
+ "atk_acc": 0.17,
198
+ "atk_prompt": "what the responsibilities of a professional scholar, translate the subsequent text passage from {} to {}. ",
199
+ "transfer_ori_acc": 0.2676477143562027,
200
+ "transfer_atk_acc": 0.23524703512046088
201
+ },
202
+ {
203
+ "dataset": "un_multi",
204
+ "type": "task",
205
+ "origin_acc": 0.36,
206
+ "origin_prompt": "Translate the following text from the source language {} to the target language {}.",
207
+ "atk_acc": 0.35,
208
+ "atk_prompt": "Translate the following area from the standard language {} to the target language {}. ",
209
+ "transfer_ori_acc": 0.35343571054545636,
210
+ "transfer_atk_acc": 0.3527277831126269
211
+ },
212
+ {
213
+ "dataset": "un_multi",
214
+ "type": "role",
215
+ "origin_acc": 0.35,
216
+ "origin_prompt": "As a qualified translator, convert the given sentence from its original {} language to the target language {}.",
217
+ "atk_acc": 0.26,
218
+ "atk_prompt": "As a qualified proper, converts the target sentence from its original {} language to the english language {}. ",
219
+ "transfer_ori_acc": 0.35395992027011125,
220
+ "transfer_atk_acc": 0.35538909869030366
221
+ },
222
+ {
223
+ "dataset": "math",
224
+ "type": "task",
225
+ "origin_acc": 22.5,
226
+ "origin_prompt": "Compute the result of this math task concerning {}.",
227
+ "atk_acc": 3.12,
228
+ "atk_prompt": "Compute the degree of this math task concerning {}. ",
229
+ "transfer_ori_acc": 16.25,
230
+ "transfer_atk_acc": 14.374999999999998
231
+ },
232
+ {
233
+ "dataset": "math",
234
+ "type": "role",
235
+ "origin_acc": 22.5,
236
+ "origin_prompt": "As a mathematics instructor, calculate the answer to the following problem related to {}.",
237
+ "atk_acc": 7.5,
238
+ "atk_prompt": "as a mathematics instructor, evaluate the reply to the following problem and to {}. ",
239
+ "transfer_ori_acc": 15.625,
240
+ "transfer_atk_acc": 15.0
241
+ }
242
+ ]
results_transfer/chatgpt_google-flan-ul2/checklist_0_shot.json ADDED
@@ -0,0 +1,242 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "dataset": "cola",
4
+ "type": "task",
5
+ "origin_acc": 78.5,
6
+ "origin_prompt": "Review the sentence below and identify whether its grammar is 'Acceptable' or 'Unacceptable':",
7
+ "atk_acc": 72.5,
8
+ "atk_prompt": "Review the sentence below and identify whether its grammar is 'Acceptable' or 'Unacceptable' zexKUTWJYG:",
9
+ "transfer_ori_acc": 86.0,
10
+ "transfer_atk_acc": 84.7
11
+ },
12
+ {
13
+ "dataset": "cola",
14
+ "type": "role",
15
+ "origin_acc": 79.0,
16
+ "origin_prompt": "While performing grammar analysis, classify the grammar of the following sentence as 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar:",
17
+ "atk_acc": 45.0,
18
+ "atk_prompt": "While performing grammar analysis, classify the grammar of the following sentence as 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar zexKUTWJYG:",
19
+ "transfer_ori_acc": 86.0,
20
+ "transfer_atk_acc": 82.19999999999999
21
+ },
22
+ {
23
+ "dataset": "mrpc",
24
+ "type": "task",
25
+ "origin_acc": 75.5,
26
+ "origin_prompt": "Evaluate whether the two provided sentences convey the same meaning by answering 'equivalent' or 'not_equivalent'.",
27
+ "atk_acc": 72.0,
28
+ "atk_prompt": "Evaluate whether the two provided sentences convey the same meaning by answering 'equivalent' or 'not_equivalent' SvAp8RlOFn. ",
29
+ "transfer_ori_acc": 86.27450980392157,
30
+ "transfer_atk_acc": 85.7843137254902
31
+ },
32
+ {
33
+ "dataset": "mrpc",
34
+ "type": "role",
35
+ "origin_acc": 75.5,
36
+ "origin_prompt": "In your capacity as a semantic assessment tool, evaluate the provided sentences and classify their meanings as 'equivalent' or 'not_equivalent'.",
37
+ "atk_acc": 73.5,
38
+ "atk_prompt": "In your capacity as a semantic assessment tool, evaluate the provided sentences and classify their meanings as 'equivalent' or 'not_equivalent' Kw6nrs57gH. ",
39
+ "transfer_ori_acc": 83.33333333333334,
40
+ "transfer_atk_acc": 83.82352941176471
41
+ },
42
+ {
43
+ "dataset": "qnli",
44
+ "type": "task",
45
+ "origin_acc": 78.0,
46
+ "origin_prompt": "Evaluate whether the given context supports the answer to the question by responding with 'entailment' or 'not_entailment'.",
47
+ "atk_acc": 75.5,
48
+ "atk_prompt": "Evaluate whether the given context supports the answer to the question by responding with 'entailment' or 'not_entailment' ItGDrrA1Sc. ",
49
+ "transfer_ori_acc": 93.89999999999999,
50
+ "transfer_atk_acc": 94.39999999999999
51
+ },
52
+ {
53
+ "dataset": "qnli",
54
+ "type": "role",
55
+ "origin_acc": 78.0,
56
+ "origin_prompt": "As a semantic researcher, evaluate whether the provided context supports the answer to the question and choose 'entailment' or 'not_entailment'.",
57
+ "atk_acc": 74.5,
58
+ "atk_prompt": "As a semantic researcher, evaluate whether the provided context supports the answer to the question and choose 'entailment' or 'not_entailment' 6a4Yn3RGVc. ",
59
+ "transfer_ori_acc": 94.69999999999999,
60
+ "transfer_atk_acc": 94.3
61
+ },
62
+ {
63
+ "dataset": "qqp",
64
+ "type": "task",
65
+ "origin_acc": 81.5,
66
+ "origin_prompt": "Assess whether the following statements are identical in meaning by answering 'equivalent' or 'not_equivalent'.",
67
+ "atk_acc": 78.0,
68
+ "atk_prompt": "Assess whether the following statements are identical in meaning by answering 'equivalent' or 'not_equivalent' iia2yL9Uzm. ",
69
+ "transfer_ori_acc": 87.7,
70
+ "transfer_atk_acc": 88.4
71
+ },
72
+ {
73
+ "dataset": "qqp",
74
+ "type": "role",
75
+ "origin_acc": 83.0,
76
+ "origin_prompt": "As an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions.",
77
+ "atk_acc": 80.5,
78
+ "atk_prompt": "As an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions Cev0s49fbe. ",
79
+ "transfer_ori_acc": 88.3,
80
+ "transfer_atk_acc": 88.5
81
+ },
82
+ {
83
+ "dataset": "rte",
84
+ "type": "task",
85
+ "origin_acc": 82.5,
86
+ "origin_prompt": "Determine if the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment'.",
87
+ "atk_acc": 80.0,
88
+ "atk_prompt": "Determine if the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment' MSAddJ4D2a.",
89
+ "transfer_ori_acc": 93.50180505415162,
90
+ "transfer_atk_acc": 93.50180505415162
91
+ },
92
+ {
93
+ "dataset": "rte",
94
+ "type": "role",
95
+ "origin_acc": 81.5,
96
+ "origin_prompt": "As an instrument for entailment evaluation, consider the two sentences and determine if their relationship is 'entailment' or 'not_entailment'. Respond with 'entailment' or 'not_entailment':",
97
+ "atk_acc": 80.0,
98
+ "atk_prompt": "As an instrument for entailment evaluation, consider the two sentences and determine if their relationship is 'entailment' or 'not_entailment'. Respond with 'entailment' or 'not_entailment' 6a4Yn3RGVc:",
99
+ "transfer_ori_acc": 93.50180505415162,
100
+ "transfer_atk_acc": 94.22382671480143
101
+ },
102
+ {
103
+ "dataset": "sst2",
104
+ "type": "task",
105
+ "origin_acc": 93.0,
106
+ "origin_prompt": "Determine the overall sentiment of this sentence, categorizing it as 'positive' or 'negative':",
107
+ "atk_acc": 90.0,
108
+ "atk_prompt": "Determine the overall sentiment of this sentence, categorizing it as 'positive' or 'negative' AMsRIKZniY: ",
109
+ "transfer_ori_acc": 96.67431192660551,
110
+ "transfer_atk_acc": 96.3302752293578
111
+ },
112
+ {
113
+ "dataset": "sst2",
114
+ "type": "role",
115
+ "origin_acc": 92.5,
116
+ "origin_prompt": "Functioning as a sentiment identification tool, assess if the following expression is 'positive' or 'negative'. Classify:",
117
+ "atk_acc": 92.0,
118
+ "atk_prompt": "Functioning as a sentiment identification tool, assess if the following expression is 'positive' or 'negative'. Classify Cev0s49fbe: ",
119
+ "transfer_ori_acc": 95.9862385321101,
120
+ "transfer_atk_acc": 96.3302752293578
121
+ },
122
+ {
123
+ "dataset": "wnli",
124
+ "type": "task",
125
+ "origin_acc": 64.79,
126
+ "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.",
127
+ "atk_acc": 59.15,
128
+ "atk_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word zexKUTWJYG. ",
129
+ "transfer_ori_acc": 78.87323943661971,
130
+ "transfer_atk_acc": 78.87323943661971
131
+ },
132
+ {
133
+ "dataset": "wnli",
134
+ "type": "role",
135
+ "origin_acc": 63.38,
136
+ "origin_prompt": "In the role of a semantic relationship analyzer, examine the connection between the given sentences and decide if they exhibit entailment or not_entailment. Answer with 'entailment' or 'not_entailment':",
137
+ "atk_acc": 57.75,
138
+ "atk_prompt": "In the role of a semantic relationship analyzer, examine the connection between the given sentences and decide if they exhibit entailment or not_entailment. Answer with 'entailment' or 'not_entailment' 5XeflW1ZJc:",
139
+ "transfer_ori_acc": 77.46478873239437,
140
+ "transfer_atk_acc": 78.87323943661971
141
+ },
142
+ {
143
+ "dataset": "mmlu",
144
+ "type": "task",
145
+ "origin_acc": 64.0,
146
+ "origin_prompt": "Regarding the following multiple-choice question on {}, pick the correct answer from the options 'A', 'B', 'C', or 'D'.",
147
+ "atk_acc": 58.0,
148
+ "atk_prompt": "Regarding the following multiple-choice question on {}, pick the correct answer from the options 'A', 'B', 'C', or 'D' Pbl3tYuoRo. ",
149
+ "transfer_ori_acc": 52.304964539007095,
150
+ "transfer_atk_acc": 52.4822695035461
151
+ },
152
+ {
153
+ "dataset": "mmlu",
154
+ "type": "role",
155
+ "origin_acc": 61.5,
156
+ "origin_prompt": "As a skilled person in the field of {}, please respond to the multiple-choice question by choosing 'A', 'B', 'C', or 'D'.",
157
+ "atk_acc": 58.0,
158
+ "atk_prompt": "As a skilled person in the field of {}, please respond to the multiple-choice question by choosing 'A', 'B', 'C', or 'D' IhuBIhoPGc.",
159
+ "transfer_ori_acc": 52.4822695035461,
160
+ "transfer_atk_acc": 53.72340425531915
161
+ },
162
+ {
163
+ "dataset": "squad_v2",
164
+ "type": "task",
165
+ "origin_acc": 69.16,
166
+ "origin_prompt": "Based on the given context, provide the best possible answer. If there's no answer available in the context, respond with 'unanswerable'.",
167
+ "atk_acc": 64.99,
168
+ "atk_prompt": "Based on the given context, provide the best possible answer. If there's no answer available in the context, respond with 'unanswerable' ItGDrrA1Sc.",
169
+ "transfer_ori_acc": 89.38229949874687,
170
+ "transfer_atk_acc": 89.31979949874687
171
+ },
172
+ {
173
+ "dataset": "squad_v2",
174
+ "type": "role",
175
+ "origin_acc": 64.31,
176
+ "origin_prompt": "As an expert with a deep understanding of the context, find the best answer. If the context doesn't include an answer, say 'unanswerable'.",
177
+ "atk_acc": 60.2,
178
+ "atk_prompt": "As an expert with a deep understanding of the context, find the best answer. If the context doesn't include an answer, say 'unanswerable' d6ZQ3u0GBQ.",
179
+ "transfer_ori_acc": 89.44479949874687,
180
+ "transfer_atk_acc": 89.81979949874687
181
+ },
182
+ {
183
+ "dataset": "iwslt",
184
+ "type": "task",
185
+ "origin_acc": 0.22,
186
+ "origin_prompt": "Translate the provided sentence from {} to {} while maintaining the original meaning and context:",
187
+ "atk_acc": 0.21,
188
+ "atk_prompt": "Translate the provided sentence from {} to {} while maintaining the original meaning and context zq0DcZ5dnI:",
189
+ "transfer_ori_acc": 0.1697518407093131,
190
+ "transfer_atk_acc": 0.17419805969910362
191
+ },
192
+ {
193
+ "dataset": "iwslt",
194
+ "type": "role",
195
+ "origin_acc": 0.22,
196
+ "origin_prompt": "In the capacity of a proficient translator, please change the following sentence from {} to {}:",
197
+ "atk_acc": 0.21,
198
+ "atk_prompt": "In the capacity of a proficient translator, please change the following sentence from {} to SFPCqpiClT {}: ",
199
+ "transfer_ori_acc": 0.17223574608131062,
200
+ "transfer_atk_acc": 0.16640042157073662
201
+ },
202
+ {
203
+ "dataset": "un_multi",
204
+ "type": "task",
205
+ "origin_acc": 0.34,
206
+ "origin_prompt": "Perform a machine translation for the provided sentence, changing it from {} to {}:",
207
+ "atk_acc": 0.33,
208
+ "atk_prompt": "Perform a machine translation for the provided sentence, changing it from {} to Kw6nrs57gH {}:",
209
+ "transfer_ori_acc": 0.3102455120610509,
210
+ "transfer_atk_acc": 0.30087524214938727
211
+ },
212
+ {
213
+ "dataset": "un_multi",
214
+ "type": "role",
215
+ "origin_acc": 0.34,
216
+ "origin_prompt": "While acting as an experienced language expert, translate the following text from {} to {}:",
217
+ "atk_acc": 0.33,
218
+ "atk_prompt": "While acting as an experienced language expert, translate the following text from {} to Kw6nrs57gH {}: ",
219
+ "transfer_ori_acc": 0.3133010160127483,
220
+ "transfer_atk_acc": 0.2966985644521811
221
+ },
222
+ {
223
+ "dataset": "math",
224
+ "type": "task",
225
+ "origin_acc": 6.88,
226
+ "origin_prompt": "Calculate the answer to the following math query about {}:",
227
+ "atk_acc": 5.0,
228
+ "atk_prompt": "Calculate the answer to the following math query about 6a4Yn3RGVc {}:",
229
+ "transfer_ori_acc": 14.374999999999998,
230
+ "transfer_atk_acc": 13.750000000000002
231
+ },
232
+ {
233
+ "dataset": "math",
234
+ "type": "role",
235
+ "origin_acc": 5.0,
236
+ "origin_prompt": "Embodying a mathematics advisor, find the answer to the following math problem with {}:",
237
+ "atk_acc": 2.5,
238
+ "atk_prompt": "Embodying a mathematics advisor, find the answer to the following math problem with 9vGXgnbWB8 {}: ",
239
+ "transfer_ori_acc": 13.125,
240
+ "transfer_atk_acc": 12.5
241
+ }
242
+ ]
results_transfer/chatgpt_google-flan-ul2/checklist_3_shot.json ADDED
@@ -0,0 +1,242 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "dataset": "cola",
4
+ "type": "task",
5
+ "origin_acc": 81.0,
6
+ "origin_prompt": "Examine the sentence and decide if its grammar is 'Acceptable' or 'Unacceptable'.",
7
+ "atk_acc": 78.0,
8
+ "atk_prompt": "Examine the sentence and decide if its grammar is 'Acceptable' or 'Unacceptable' 9Q3S1F94fE. ",
9
+ "transfer_ori_acc": 85.9,
10
+ "transfer_atk_acc": 86.8
11
+ },
12
+ {
13
+ "dataset": "cola",
14
+ "type": "role",
15
+ "origin_acc": 81.5,
16
+ "origin_prompt": "Functioning as a grammar evaluation tool, analyze the given sentence and decide if it is grammatically correct, responding with 'acceptable' or 'unacceptable'.",
17
+ "atk_acc": 79.5,
18
+ "atk_prompt": "Functioning as a grammar evaluation tool, analyze the given sentence and decide if it is grammatically correct, responding with 'acceptable' or 'unacceptable' 6a4Yn3RGVc. ",
19
+ "transfer_ori_acc": 86.4,
20
+ "transfer_atk_acc": 86.8
21
+ },
22
+ {
23
+ "dataset": "mrpc",
24
+ "type": "task",
25
+ "origin_acc": 74.0,
26
+ "origin_prompt": "Assess if the two given sentences have equivalent meanings by selecting 'equivalent' or 'not_equivalent'.",
27
+ "atk_acc": 68.5,
28
+ "atk_prompt": "Assess if the two given sentences have equivalent meanings by selecting 'equivalent' or 'not_equivalent' LkF0FZxMZ4. ",
29
+ "transfer_ori_acc": 84.06862745098039,
30
+ "transfer_atk_acc": 84.06862745098039
31
+ },
32
+ {
33
+ "dataset": "mrpc",
34
+ "type": "role",
35
+ "origin_acc": 72.0,
36
+ "origin_prompt": "As a linguistic comparator, review the following pair of sentences and determine their semantic equivalence by choosing 'equivalent' or 'not_equivalent'.",
37
+ "atk_acc": 68.0,
38
+ "atk_prompt": "As a linguistic comparator, review the following pair of sentences and determine their semantic equivalence by choosing 'equivalent' or 'not_equivalent' bhrRSokrfa. ",
39
+ "transfer_ori_acc": 85.5392156862745,
40
+ "transfer_atk_acc": 85.7843137254902
41
+ },
42
+ {
43
+ "dataset": "qnli",
44
+ "type": "task",
45
+ "origin_acc": 84.5,
46
+ "origin_prompt": "Examine the context and question, and determine if the context logically implies the answer by selecting 'entailment' or 'not_entailment'.",
47
+ "atk_acc": 80.5,
48
+ "atk_prompt": "Examine the context and question, and determine if the context logically implies the answer by selecting 'entailment' or 'not_entailment' 9Q3S1F94fE. ",
49
+ "transfer_ori_acc": 93.5,
50
+ "transfer_atk_acc": 93.2
51
+ },
52
+ {
53
+ "dataset": "qnli",
54
+ "type": "role",
55
+ "origin_acc": 84.5,
56
+ "origin_prompt": "As a textual inference expert, analyze if the answer to the question can be deduced from the provided context and select 'entailment' or 'not_entailment'.",
57
+ "atk_acc": 81.0,
58
+ "atk_prompt": "As a textual inference expert, analyze if the answer to the question can be deduced from the provided context and select 'entailment' or 'not_entailment' YHQiDfFhw4. ",
59
+ "transfer_ori_acc": 93.0,
60
+ "transfer_atk_acc": 93.0
61
+ },
62
+ {
63
+ "dataset": "qqp",
64
+ "type": "task",
65
+ "origin_acc": 82.0,
66
+ "origin_prompt": "Can these two statements be considered equal in meaning? Answer with 'equivalent' or 'not_equivalent'.",
67
+ "atk_acc": 79.0,
68
+ "atk_prompt": "Can these two statements be considered equal in meaning? Answer with 'equivalent' or 'not_equivalent' xikCjkMydH. ",
69
+ "transfer_ori_acc": 87.6,
70
+ "transfer_atk_acc": 87.7
71
+ },
72
+ {
73
+ "dataset": "qqp",
74
+ "type": "role",
75
+ "origin_acc": 83.0,
76
+ "origin_prompt": "In the capacity of a question assessment system, indicate if the meaning of the provided questions is the same, responding with 'equivalent' or 'not_equivalent'.",
77
+ "atk_acc": 81.5,
78
+ "atk_prompt": "In the capacity of a question assessment system, indicate if the meaning of the provided questions is the same, responding with 'equivalent' or 'not_equivalent' 5XeflW1ZJc. ",
79
+ "transfer_ori_acc": 88.7,
80
+ "transfer_atk_acc": 89.1
81
+ },
82
+ {
83
+ "dataset": "rte",
84
+ "type": "task",
85
+ "origin_acc": 84.5,
86
+ "origin_prompt": "Review the two statements and categorize their relationship as either 'entailment' or 'not_entailment'.",
87
+ "atk_acc": 80.5,
88
+ "atk_prompt": "Review the two statements and categorize their relationship as either 'entailment' or 'not_entailment' 5XeflW1ZJc. ",
89
+ "transfer_ori_acc": 92.7797833935018,
90
+ "transfer_atk_acc": 91.69675090252709
91
+ },
92
+ {
93
+ "dataset": "rte",
94
+ "type": "role",
95
+ "origin_acc": 84.0,
96
+ "origin_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment'.",
97
+ "atk_acc": 80.5,
98
+ "atk_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment' bhrRSokrfa. ",
99
+ "transfer_ori_acc": 92.4187725631769,
100
+ "transfer_atk_acc": 93.50180505415162
101
+ },
102
+ {
103
+ "dataset": "sst2",
104
+ "type": "task",
105
+ "origin_acc": 96.0,
106
+ "origin_prompt": "Read the provided excerpt and choose between 'positive' and 'negative' to describe its sentiment.",
107
+ "atk_acc": 77.5,
108
+ "atk_prompt": "go the read excerpt and be between 'positive' and 'negative' to describing its mood. ",
109
+ "transfer_ori_acc": 96.67431192660551,
110
+ "transfer_atk_acc": 96.78899082568807
111
+ },
112
+ {
113
+ "dataset": "sst2",
114
+ "type": "role",
115
+ "origin_acc": 97.5,
116
+ "origin_prompt": "In the capacity of a sentiment classifier, decide whether the given quote is 'positive' or 'negative'.",
117
+ "atk_acc": 13.5,
118
+ "atk_prompt": "in the capacity of a sentiment classizer, chose neither the said quotes is 'positive' or 'negative'. ",
119
+ "transfer_ori_acc": 96.44495412844036,
120
+ "transfer_atk_acc": 91.97247706422019
121
+ },
122
+ {
123
+ "dataset": "wnli",
124
+ "type": "task",
125
+ "origin_acc": 69.01,
126
+ "origin_prompt": "Indicate if the connection between the following sentences is 'entailment' or 'not_entailment'.",
127
+ "atk_acc": 61.97,
128
+ "atk_prompt": "Indicate if the connection between the followping senZences is 'entailment' or 'not_entailment'. ",
129
+ "transfer_ori_acc": 76.05633802816901,
130
+ "transfer_atk_acc": 76.05633802816901
131
+ },
132
+ {
133
+ "dataset": "wnli",
134
+ "type": "role",
135
+ "origin_acc": 66.2,
136
+ "origin_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment'.",
137
+ "atk_acc": 57.75,
138
+ "atk_prompt": "In your role as an entailment analysis tool, asress the rlationship between the given sentences and classify it as 'entailment' or 'not_entailment'. ",
139
+ "transfer_ori_acc": 77.46478873239437,
140
+ "transfer_atk_acc": 74.64788732394366
141
+ },
142
+ {
143
+ "dataset": "mmlu",
144
+ "type": "task",
145
+ "origin_acc": 66.0,
146
+ "origin_prompt": "Please respond to the multiple-choice question about {} by selecting the appropriate answer: 'A', 'B', 'C', or 'D'.",
147
+ "atk_acc": 63.0,
148
+ "atk_prompt": "Please respond to the multiple-choice question about {} by selecting the appropriate answer: 'A', 'B', 'C', or 'D' MQnugHcaoy. ",
149
+ "transfer_ori_acc": 53.36879432624113,
150
+ "transfer_atk_acc": 53.01418439716312
151
+ },
152
+ {
153
+ "dataset": "mmlu",
154
+ "type": "role",
155
+ "origin_acc": 65.5,
156
+ "origin_prompt": "As someone well-versed in {}, please address the multiple-choice question below by selecting 'A', 'B', 'C', or 'D'.",
157
+ "atk_acc": 63.0,
158
+ "atk_prompt": "As someone well-versed in {}, please address the multiple-choice question below by selecting 'A', 'B', 'C', or 'D' mOJ9nEwT4f.",
159
+ "transfer_ori_acc": 53.01418439716312,
160
+ "transfer_atk_acc": 53.72340425531915
161
+ },
162
+ {
163
+ "dataset": "squad_v2",
164
+ "type": "task",
165
+ "origin_acc": 73.25,
166
+ "origin_prompt": "Based on the given context, provide the best possible answer. If there's no answer available in the context, reply with 'unanswerable'.",
167
+ "atk_acc": 71.09,
168
+ "atk_prompt": "Based on the given context, provide the best possible answer. If there's no answer available in the context, reply with 'unanswerable' ItGDrrA1Sc.",
169
+ "transfer_ori_acc": 90.04896616541353,
170
+ "transfer_atk_acc": 90.54896616541353
171
+ },
172
+ {
173
+ "dataset": "squad_v2",
174
+ "type": "role",
175
+ "origin_acc": 72.63,
176
+ "origin_prompt": "As a proficient expert in the given context, search for the most relevant answer. If the answer cannot be found, respond by saying 'unanswerable'.",
177
+ "atk_acc": 63.07,
178
+ "atk_prompt": "As a proficient expert in the given context, search for the most relevant answer. If the answer cannot be found, respond by saying 'unanswerable' XStsD2pUzu.",
179
+ "transfer_ori_acc": 90.54896616541353,
180
+ "transfer_atk_acc": 90.04896616541353
181
+ },
182
+ {
183
+ "dataset": "iwslt",
184
+ "type": "task",
185
+ "origin_acc": 0.24,
186
+ "origin_prompt": "Convert the following sentence from its original {} language to the target language {}.",
187
+ "atk_acc": 0.22,
188
+ "atk_prompt": "Convert the following sentence from its original {} language to the target language 5XeflW1ZJc {}. ",
189
+ "transfer_ori_acc": 0.23981220411098694,
190
+ "transfer_atk_acc": 0.19533148180136461
191
+ },
192
+ {
193
+ "dataset": "iwslt",
194
+ "type": "role",
195
+ "origin_acc": 0.24,
196
+ "origin_prompt": "In the role of a language expert, perform a machine translation for the provided sentence, changing it from {} to {}.",
197
+ "atk_acc": 0.23,
198
+ "atk_prompt": "In the role of a language expert, perform a machine translation for the provided sentence, changing it from {} to MQnugHcaoy {}. ",
199
+ "transfer_ori_acc": 0.236844571841243,
200
+ "transfer_atk_acc": 0.22997780997943829
201
+ },
202
+ {
203
+ "dataset": "un_multi",
204
+ "type": "task",
205
+ "origin_acc": 0.35,
206
+ "origin_prompt": "Perform a machine translation for the provided sentence, changing it from {} to {}.",
207
+ "atk_acc": 0.34,
208
+ "atk_prompt": "Perform a machine translation for the provided sentence, changing it from {} to Kw6nrs57gH {}. ",
209
+ "transfer_ori_acc": 0.3589520956246615,
210
+ "transfer_atk_acc": 0.3536481319882283
211
+ },
212
+ {
213
+ "dataset": "un_multi",
214
+ "type": "role",
215
+ "origin_acc": 0.35,
216
+ "origin_prompt": "As a qualified translator, convert the given sentence from its original {} language to the target language {}.",
217
+ "atk_acc": 0.31,
218
+ "atk_prompt": "As a qualified translator, convert the given sentence from its original {} language to the target language 5XeflW1ZJc {}. ",
219
+ "transfer_ori_acc": 0.35395992027011125,
220
+ "transfer_atk_acc": 0.35277034079178615
221
+ },
222
+ {
223
+ "dataset": "math",
224
+ "type": "task",
225
+ "origin_acc": 23.12,
226
+ "origin_prompt": "Calculate the answer to the following math query about {}.",
227
+ "atk_acc": 19.38,
228
+ "atk_prompt": "Calculate the answer to the following math query about 5XeflW1ZJc {}. ",
229
+ "transfer_ori_acc": 16.25,
230
+ "transfer_atk_acc": 15.625
231
+ },
232
+ {
233
+ "dataset": "math",
234
+ "type": "role",
235
+ "origin_acc": 22.5,
236
+ "origin_prompt": "As a mathematics instructor, calculate the answer to the following problem related to {}.",
237
+ "atk_acc": 21.25,
238
+ "atk_prompt": "As a mathematics instructor, calculate the answer to the following problem related to AMsRIKZniY {}. ",
239
+ "transfer_ori_acc": 15.625,
240
+ "transfer_atk_acc": 15.0
241
+ }
242
+ ]
results_transfer/chatgpt_google-flan-ul2/deepwordbug_0_shot.json ADDED
@@ -0,0 +1,242 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "dataset": "cola",
4
+ "type": "task",
5
+ "origin_acc": 79.0,
6
+ "origin_prompt": "Review the sentence below and identify whether its grammar is 'Acceptable' or 'Unacceptable':",
7
+ "atk_acc": 74.5,
8
+ "atk_prompt": "Review the sentece below and identify whether its grammar is 'Acceptable' or 'Unacceptable':",
9
+ "transfer_ori_acc": 86.0,
10
+ "transfer_atk_acc": 86.3
11
+ },
12
+ {
13
+ "dataset": "cola",
14
+ "type": "role",
15
+ "origin_acc": 79.5,
16
+ "origin_prompt": "While performing grammar analysis, classify the grammar of the following sentence as 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar:",
17
+ "atk_acc": 0.0,
18
+ "atk_prompt": "nhile performing grammr analysis, classify the rgammar of the fllowing sentenZe as 'acceptable' for corFect Xrammar or 'unacceptable' for incorrect gramUar:",
19
+ "transfer_ori_acc": 86.0,
20
+ "transfer_atk_acc": 84.39999999999999
21
+ },
22
+ {
23
+ "dataset": "mrpc",
24
+ "type": "task",
25
+ "origin_acc": 74.0,
26
+ "origin_prompt": "Evaluate whether the two provided sentences convey the same meaning by answering 'equivalent' or 'not_equivalent'.",
27
+ "atk_acc": 70.0,
28
+ "atk_prompt": "Evaluate whether the two provRided sentnces convey the same meaning by answering 'equivalent' or 'not_equivalent'. ",
29
+ "transfer_ori_acc": 86.27450980392157,
30
+ "transfer_atk_acc": 84.06862745098039
31
+ },
32
+ {
33
+ "dataset": "mrpc",
34
+ "type": "role",
35
+ "origin_acc": 76.0,
36
+ "origin_prompt": "In your capacity as a semantic assessment tool, evaluate the provided sentences and classify their meanings as 'equivalent' or 'not_equivalent'.",
37
+ "atk_acc": 53.0,
38
+ "atk_prompt": "nI your capacity as a sematnic asseJssment tol, evaluate the prvoided senRtences and classify their meainngs as 'equivalent' or 'not_equivalent'. ",
39
+ "transfer_ori_acc": 83.33333333333334,
40
+ "transfer_atk_acc": 82.84313725490196
41
+ },
42
+ {
43
+ "dataset": "qnli",
44
+ "type": "task",
45
+ "origin_acc": 78.0,
46
+ "origin_prompt": "Evaluate whether the given context supports the answer to the question by responding with 'entailment' or 'not_entailment'.",
47
+ "atk_acc": 50.0,
48
+ "atk_prompt": "Exaluate whetxher the iven contet Supports the anwser to the queston by respEonding with 'entailment' or 'not_entailment'. ",
49
+ "transfer_ori_acc": 93.89999999999999,
50
+ "transfer_atk_acc": 94.39999999999999
51
+ },
52
+ {
53
+ "dataset": "qnli",
54
+ "type": "role",
55
+ "origin_acc": 79.5,
56
+ "origin_prompt": "As a textual inference expert, analyze if the answer to the question can be deduced from the provided context and select 'entailment' or 'not_entailment'.",
57
+ "atk_acc": 19.5,
58
+ "atk_prompt": "As a textula inference expert, aInalyze if the answer to the question can be deducud from the provided context and Melect 'entailment' or 'not_entailment'. ",
59
+ "transfer_ori_acc": 93.30000000000001,
60
+ "transfer_atk_acc": 93.2
61
+ },
62
+ {
63
+ "dataset": "qqp",
64
+ "type": "task",
65
+ "origin_acc": 82.0,
66
+ "origin_prompt": "Assess whether the following statements are identical in meaning by answering 'equivalent' or 'not_equivalent'.",
67
+ "atk_acc": 45.0,
68
+ "atk_prompt": "Assess whethdr the followng sLtatements are identocal in meuaning by answerinJ 'equivalent' or 'not_equivalent'. ",
69
+ "transfer_ori_acc": 87.7,
70
+ "transfer_atk_acc": 87.6
71
+ },
72
+ {
73
+ "dataset": "qqp",
74
+ "type": "role",
75
+ "origin_acc": 82.5,
76
+ "origin_prompt": "As an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions.",
77
+ "atk_acc": 79.5,
78
+ "atk_prompt": "Ao an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions. ",
79
+ "transfer_ori_acc": 88.3,
80
+ "transfer_atk_acc": 87.9
81
+ },
82
+ {
83
+ "dataset": "rte",
84
+ "type": "task",
85
+ "origin_acc": 83.5,
86
+ "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.",
87
+ "atk_acc": 79.0,
88
+ "atk_prompt": "ArYe the folowing two sentenes entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word. ",
89
+ "transfer_ori_acc": 94.22382671480143,
90
+ "transfer_atk_acc": 94.22382671480143
91
+ },
92
+ {
93
+ "dataset": "rte",
94
+ "type": "role",
95
+ "origin_acc": 80.5,
96
+ "origin_prompt": "As an instrument for entailment evaluation, consider the two sentences and determine if their relationship is 'entailment' or 'not_entailment'. Respond with 'entailment' or 'not_entailment':",
97
+ "atk_acc": 78.5,
98
+ "atk_prompt": "IAs an instrument for entailment evaluatioRn, consider the otwo sentences and determine if their relationship is 'entailment' or 'not_entailment'. Respond with 'entailment' or 'not_entailment':",
99
+ "transfer_ori_acc": 93.50180505415162,
100
+ "transfer_atk_acc": 93.86281588447653
101
+ },
102
+ {
103
+ "dataset": "sst2",
104
+ "type": "task",
105
+ "origin_acc": 93.0,
106
+ "origin_prompt": "Determine the overall sentiment of this sentence, categorizing it as 'positive' or 'negative':",
107
+ "atk_acc": 88.5,
108
+ "atk_prompt": "Deetrmine the overall sentLment of this sentence, categorizing it as 'positive' or 'negative': ",
109
+ "transfer_ori_acc": 96.67431192660551,
110
+ "transfer_atk_acc": 96.55963302752293
111
+ },
112
+ {
113
+ "dataset": "sst2",
114
+ "type": "role",
115
+ "origin_acc": 93.0,
116
+ "origin_prompt": "Functioning as a sentiment identification tool, assess if the following expression is 'positive' or 'negative'. Classify:",
117
+ "atk_acc": 85.0,
118
+ "atk_prompt": "uFnctioning as a sentmient identification tool, assess if the folGowing expression is 'positive' or 'negative'. Classify: ",
119
+ "transfer_ori_acc": 95.9862385321101,
120
+ "transfer_atk_acc": 95.52752293577981
121
+ },
122
+ {
123
+ "dataset": "wnli",
124
+ "type": "task",
125
+ "origin_acc": 66.2,
126
+ "origin_prompt": "Examine the pair of sentences and determine if they exhibit entailment or not_entailment. Answer with either 'entailment' or 'not_entailment'.",
127
+ "atk_acc": 53.52,
128
+ "atk_prompt": "Examine the Bair of snetences and determine if they exhibit entailment or not_entailment. Ancwer with eMther 'entailment' or 'not_entailment'.",
129
+ "transfer_ori_acc": 78.87323943661971,
130
+ "transfer_atk_acc": 77.46478873239437
131
+ },
132
+ {
133
+ "dataset": "wnli",
134
+ "type": "role",
135
+ "origin_acc": 63.38,
136
+ "origin_prompt": "In the role of a semantic relationship analyzer, examine the connection between the given sentences and decide if they exhibit entailment or not_entailment. Answer with 'entailment' or 'not_entailment':",
137
+ "atk_acc": 53.52,
138
+ "atk_prompt": "IQn the role of a semantic relationship analyzer, eamine the connection between the given sentences and decide if they exhbit entailment or not_entailment. Answer with 'entailment' or 'not_entailment':",
139
+ "transfer_ori_acc": 77.46478873239437,
140
+ "transfer_atk_acc": 77.46478873239437
141
+ },
142
+ {
143
+ "dataset": "mmlu",
144
+ "type": "task",
145
+ "origin_acc": 62.5,
146
+ "origin_prompt": "Regarding the following multiple-choice question on {}, pick the correct answer from the options 'A', 'B', 'C', or 'D'.",
147
+ "atk_acc": 60.0,
148
+ "atk_prompt": "Regarding the following multiple-choice question on {}, pick the correct answeWr from the options 'A', 'B', 'C', or 'D'. ",
149
+ "transfer_ori_acc": 52.304964539007095,
150
+ "transfer_atk_acc": 52.4822695035461
151
+ },
152
+ {
153
+ "dataset": "mmlu",
154
+ "type": "role",
155
+ "origin_acc": 62.5,
156
+ "origin_prompt": "With your knowledge of {}, tackle the following multiple-choice question by choosing 'A', 'B', 'C', or 'D'.",
157
+ "atk_acc": 59.0,
158
+ "atk_prompt": "With your knowledge of {}, tackle the follQwing multiple-choice question by choosing 'A', 'B', 'C', or 'D'.",
159
+ "transfer_ori_acc": 52.12765957446809,
160
+ "transfer_atk_acc": 53.191489361702125
161
+ },
162
+ {
163
+ "dataset": "squad_v2",
164
+ "type": "task",
165
+ "origin_acc": 71.87,
166
+ "origin_prompt": "Discover the best answer based on the context. If the context doesn't include an answer, respond with 'unanswerable'.",
167
+ "atk_acc": 56.61,
168
+ "atk_prompt": "DiscoveMr the best answr bassd on the Rontext. Iof the cnotext doesn't include an answer, resCpond with 'unanswerable'.",
169
+ "transfer_ori_acc": 89.38229949874687,
170
+ "transfer_atk_acc": 90.38229949874687
171
+ },
172
+ {
173
+ "dataset": "squad_v2",
174
+ "type": "role",
175
+ "origin_acc": 64.5,
176
+ "origin_prompt": "As an expert with a deep understanding of the context, find the best answer. If the context doesn't include an answer, say 'unanswerable'.",
177
+ "atk_acc": 36.52,
178
+ "atk_prompt": "zs an exprt with a deep understanding of the context, find the best answr. If the context doesn't include an anCswer, asy 'unanswerable'.",
179
+ "transfer_ori_acc": 89.44479949874687,
180
+ "transfer_atk_acc": 89.64896616541355
181
+ },
182
+ {
183
+ "dataset": "iwslt",
184
+ "type": "task",
185
+ "origin_acc": 0.22,
186
+ "origin_prompt": "Translate the provided sentence from {} to {} while maintaining the original meaning and context:",
187
+ "atk_acc": 0.21,
188
+ "atk_prompt": "Translate the provideMd sentence from {} to {} while mainOaining the original meaning and context:",
189
+ "transfer_ori_acc": 0.1697518407093131,
190
+ "transfer_atk_acc": 0.1715931236169142
191
+ },
192
+ {
193
+ "dataset": "iwslt",
194
+ "type": "role",
195
+ "origin_acc": 0.22,
196
+ "origin_prompt": "In the capacity of a proficient translator, please change the following sentence from {} to {}:",
197
+ "atk_acc": 0.21,
198
+ "atk_prompt": "In the capacity of a proficient tanslator, plnase change the following senlence from {} to {}: ",
199
+ "transfer_ori_acc": 0.17223574608131062,
200
+ "transfer_atk_acc": 0.17294967708177186
201
+ },
202
+ {
203
+ "dataset": "un_multi",
204
+ "type": "task",
205
+ "origin_acc": 0.34,
206
+ "origin_prompt": "Translate the following text from the source language {} to the target language {}:",
207
+ "atk_acc": 0.33,
208
+ "atk_prompt": "Translate the following text from the source language {} to the Ptarget langugae {}:",
209
+ "transfer_ori_acc": 0.30893486445367974,
210
+ "transfer_atk_acc": 0.3118511970354398
211
+ },
212
+ {
213
+ "dataset": "un_multi",
214
+ "type": "role",
215
+ "origin_acc": 0.34,
216
+ "origin_prompt": "In the role of a language expert, perform a machine translation for the provided sentence, changing it from {} to {}:",
217
+ "atk_acc": 0.32,
218
+ "atk_prompt": "In the role of a language expert, pErform a macRhine yranslation for the provided sentence, cZanging it from {} to {}: ",
219
+ "transfer_ori_acc": 0.31256243540663714,
220
+ "transfer_atk_acc": 0.31148261863602605
221
+ },
222
+ {
223
+ "dataset": "math",
224
+ "type": "task",
225
+ "origin_acc": 6.88,
226
+ "origin_prompt": "Resolve the following mathematical question associated with {}:",
227
+ "atk_acc": 5.62,
228
+ "atk_prompt": "Resolve the following mathematical quXestion assocated with {}:",
229
+ "transfer_ori_acc": 14.374999999999998,
230
+ "transfer_atk_acc": 14.374999999999998
231
+ },
232
+ {
233
+ "dataset": "math",
234
+ "type": "role",
235
+ "origin_acc": 5.0,
236
+ "origin_prompt": "Embodying a mathematics advisor, find the answer to the following math problem with {}:",
237
+ "atk_acc": 2.5,
238
+ "atk_prompt": "Embodying a mathematics advisor, ind the anwer to the fpollowing math problem with {}: ",
239
+ "transfer_ori_acc": 13.125,
240
+ "transfer_atk_acc": 11.875
241
+ }
242
+ ]
results_transfer/chatgpt_google-flan-ul2/deepwordbug_3_shot.json ADDED
@@ -0,0 +1,242 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "dataset": "cola",
4
+ "type": "task",
5
+ "origin_acc": 80.5,
6
+ "origin_prompt": "Examine the given sentence and decide if it is grammatically sound. Answer with either 'Acceptable' or 'Unacceptable'.",
7
+ "atk_acc": 78.5,
8
+ "atk_prompt": "Examine the given sentence and decide if it is gramatically sound. nAswer with either 'Acceptable' or 'Unacceptable'. ",
9
+ "transfer_ori_acc": 86.2,
10
+ "transfer_atk_acc": 86.3
11
+ },
12
+ {
13
+ "dataset": "cola",
14
+ "type": "role",
15
+ "origin_acc": 81.5,
16
+ "origin_prompt": "Functioning as a grammar evaluation tool, analyze the given sentence and decide if it is grammatically correct, responding with 'acceptable' or 'unacceptable'.",
17
+ "atk_acc": 78.5,
18
+ "atk_prompt": "Funtioning as a graMmar evaluation tool, analyzY the given sentence and decide if it is grammatically corLect, responding with 'acceptable' or 'unacceptable'. ",
19
+ "transfer_ori_acc": 86.4,
20
+ "transfer_atk_acc": 87.1
21
+ },
22
+ {
23
+ "dataset": "mrpc",
24
+ "type": "task",
25
+ "origin_acc": 74.5,
26
+ "origin_prompt": "Examine the pair of sentences and decide if their meanings are identical by answering with 'equivalent' or 'not_equivalent'.",
27
+ "atk_acc": 65.5,
28
+ "atk_prompt": "Examine the Kair of sentences and edcide if their meanijngs are ibentical by answering with 'equivalent' or 'not_equivalent'. ",
29
+ "transfer_ori_acc": 85.5392156862745,
30
+ "transfer_atk_acc": 85.29411764705883
31
+ },
32
+ {
33
+ "dataset": "mrpc",
34
+ "type": "role",
35
+ "origin_acc": 72.0,
36
+ "origin_prompt": "As a linguistic comparator, review the following pair of sentences and determine their semantic equivalence by choosing 'equivalent' or 'not_equivalent'.",
37
+ "atk_acc": 66.5,
38
+ "atk_prompt": "AV a linguistic comaprator, review the following pair of sentences and determine their semayntic equivalence by chhoosing 'equivalent' or 'not_equivalent'. ",
39
+ "transfer_ori_acc": 85.5392156862745,
40
+ "transfer_atk_acc": 85.7843137254902
41
+ },
42
+ {
43
+ "dataset": "qnli",
44
+ "type": "task",
45
+ "origin_acc": 85.0,
46
+ "origin_prompt": "Examine the context and question, and determine if the context logically implies the answer by selecting 'entailment' or 'not_entailment'.",
47
+ "atk_acc": 77.5,
48
+ "atk_prompt": "Examine the contedt and question, and determine if the cXontext logically implies the answer by selecting 'entailment' or 'not_entailment'. ",
49
+ "transfer_ori_acc": 93.5,
50
+ "transfer_atk_acc": 93.89999999999999
51
+ },
52
+ {
53
+ "dataset": "qnli",
54
+ "type": "role",
55
+ "origin_acc": 86.0,
56
+ "origin_prompt": "As a textual analyst, examine if the given context logically implies the answer to the question and indicate your decision with 'entailment' or 'not_entailment'.",
57
+ "atk_acc": 75.0,
58
+ "atk_prompt": "A a textual analyst, xamine if the gPven conSext logiclaly implies the asnwer to the qbuestion and indicate your decision with 'entailment' or 'not_entailment'. ",
59
+ "transfer_ori_acc": 93.2,
60
+ "transfer_atk_acc": 93.60000000000001
61
+ },
62
+ {
63
+ "dataset": "qqp",
64
+ "type": "task",
65
+ "origin_acc": 82.0,
66
+ "origin_prompt": "Can these two statements be considered equal in meaning? Answer with 'equivalent' or 'not_equivalent'.",
67
+ "atk_acc": 80.0,
68
+ "atk_prompt": "Can these two statements be considJred equal in meaning? Answer with 'equivalent' or 'not_equivalent'. ",
69
+ "transfer_ori_acc": 87.6,
70
+ "transfer_atk_acc": 87.8
71
+ },
72
+ {
73
+ "dataset": "qqp",
74
+ "type": "role",
75
+ "origin_acc": 83.0,
76
+ "origin_prompt": "Acting as a question equivalence instrument, determine if the provided questions are equivalent in meaning, answering with 'equivalent' for similar questions or 'not_equivalent' for dissimilar ones.",
77
+ "atk_acc": 81.5,
78
+ "atk_prompt": "Actinm as a question equivalence instrumetn, determine if the provided questions are equivalent in meaning, answering with 'equivalent' for similar questions or 'not_equivalent' for dissimilar ones. ",
79
+ "transfer_ori_acc": 88.6,
80
+ "transfer_atk_acc": 88.4
81
+ },
82
+ {
83
+ "dataset": "rte",
84
+ "type": "task",
85
+ "origin_acc": 85.0,
86
+ "origin_prompt": "Does the relationship between the given sentences represent entailment or not_entailment? Respond with 'entailment' or 'not_entailment'.",
87
+ "atk_acc": 80.5,
88
+ "atk_prompt": "Does the relationshBp between the given sentences repersent entailment or not_entailment? Respond with 'entailment' or 'not_entailment'. ",
89
+ "transfer_ori_acc": 93.86281588447653,
90
+ "transfer_atk_acc": 93.50180505415162
91
+ },
92
+ {
93
+ "dataset": "rte",
94
+ "type": "role",
95
+ "origin_acc": 84.5,
96
+ "origin_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment'.",
97
+ "atk_acc": 81.0,
98
+ "atk_prompt": "In your role as an entailment analysis toIol, aZssess the relationship between the given sentences and cassify it as 'entailment' or 'not_entailment'. ",
99
+ "transfer_ori_acc": 92.4187725631769,
100
+ "transfer_atk_acc": 93.14079422382672
101
+ },
102
+ {
103
+ "dataset": "sst2",
104
+ "type": "task",
105
+ "origin_acc": 97.0,
106
+ "origin_prompt": "Evaluate the sentiment of the given text and classify it as 'positive' or 'negative'.",
107
+ "atk_acc": 94.0,
108
+ "atk_prompt": "ENvaluate the senttment of the giOven text and classify it as 'positive' or 'negative'. ",
109
+ "transfer_ori_acc": 96.78899082568807,
110
+ "transfer_atk_acc": 96.67431192660551
111
+ },
112
+ {
113
+ "dataset": "sst2",
114
+ "type": "role",
115
+ "origin_acc": 97.0,
116
+ "origin_prompt": "As a sentiment classifier, determine whether the following text is 'positive' or 'negative'.",
117
+ "atk_acc": 94.0,
118
+ "atk_prompt": "qAs a setniment classifier, determnne whether the following text is 'positive' or 'negative'. ",
119
+ "transfer_ori_acc": 96.67431192660551,
120
+ "transfer_atk_acc": 96.67431192660551
121
+ },
122
+ {
123
+ "dataset": "wnli",
124
+ "type": "task",
125
+ "origin_acc": 67.61,
126
+ "origin_prompt": "Indicate if the connection between the following sentences is 'entailment' or 'not_entailment'.",
127
+ "atk_acc": 64.79,
128
+ "atk_prompt": "Indicate if the connection between the following sentences is 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true . ",
129
+ "transfer_ori_acc": 76.05633802816901,
130
+ "transfer_atk_acc": 74.64788732394366
131
+ },
132
+ {
133
+ "dataset": "wnli",
134
+ "type": "role",
135
+ "origin_acc": 64.79,
136
+ "origin_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment'.",
137
+ "atk_acc": 59.15,
138
+ "atk_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true . ",
139
+ "transfer_ori_acc": 77.46478873239437,
140
+ "transfer_atk_acc": 74.64788732394366
141
+ },
142
+ {
143
+ "dataset": "mmlu",
144
+ "type": "task",
145
+ "origin_acc": 66.5,
146
+ "origin_prompt": "In relation to the multiple-choice question on {}, please provide the accurate answer by choosing 'A', 'B', 'C', or 'D'.",
147
+ "atk_acc": 63.5,
148
+ "atk_prompt": "ISn ralation to the mulitple-choice question on {}, pleare provide the accurate answer by choosing 'A', 'B', 'C', or 'D'. ",
149
+ "transfer_ori_acc": 53.54609929078015,
150
+ "transfer_atk_acc": 52.836879432624116
151
+ },
152
+ {
153
+ "dataset": "mmlu",
154
+ "type": "role",
155
+ "origin_acc": 65.5,
156
+ "origin_prompt": "As someone well-versed in {}, please address the multiple-choice question below by selecting 'A', 'B', 'C', or 'D'.",
157
+ "atk_acc": 62.5,
158
+ "atk_prompt": "As someone well-versed in {}, please address the mulZtiple-choice question below by selecting 'A', 'B', 'C', or 'D'.",
159
+ "transfer_ori_acc": 53.01418439716312,
160
+ "transfer_atk_acc": 53.36879432624113
161
+ },
162
+ {
163
+ "dataset": "squad_v2",
164
+ "type": "task",
165
+ "origin_acc": 73.25,
166
+ "origin_prompt": "Based on the given context, provide the best possible answer. If there's no answer available in the context, reply with 'unanswerable'.",
167
+ "atk_acc": 67.18,
168
+ "atk_prompt": "Based on the goiven context, provide the best possible answer. IL there's no answer available in the coYtext, reply with 'unanswerable'.",
169
+ "transfer_ori_acc": 90.04896616541353,
170
+ "transfer_atk_acc": 90.54896616541353
171
+ },
172
+ {
173
+ "dataset": "squad_v2",
174
+ "type": "role",
175
+ "origin_acc": 72.68,
176
+ "origin_prompt": "As a subject matter expert, extract the correct answer from the context. If an answer is not present, indicate 'unanswerable'.",
177
+ "atk_acc": 66.77,
178
+ "atk_prompt": "s a subject matter eAxpert, extract the correc answer from the context. If an answer is not presnt, idicate 'unanswerable'.",
179
+ "transfer_ori_acc": 89.54896616541353,
180
+ "transfer_atk_acc": 90.04896616541353
181
+ },
182
+ {
183
+ "dataset": "iwslt",
184
+ "type": "task",
185
+ "origin_acc": 0.23,
186
+ "origin_prompt": "Given the sentence below, perform a machine translation from {} to {}.",
187
+ "atk_acc": 0.23,
188
+ "atk_prompt": "Given the sentence below, perform a mRachine traCslation from {} to {}. ",
189
+ "transfer_ori_acc": 0.23710409171247415,
190
+ "transfer_atk_acc": 0.23480177292913945
191
+ },
192
+ {
193
+ "dataset": "iwslt",
194
+ "type": "role",
195
+ "origin_acc": 0.24,
196
+ "origin_prompt": "Assuming the responsibilities of a professional translator, translate the subsequent text passage from {} to {}.",
197
+ "atk_acc": 0.23,
198
+ "atk_prompt": "Assuming the responsibilities of a professional transaltor, translate the subsequent text passage from {} to {}. ",
199
+ "transfer_ori_acc": 0.2676477143562027,
200
+ "transfer_atk_acc": 0.23356148320550005
201
+ },
202
+ {
203
+ "dataset": "un_multi",
204
+ "type": "task",
205
+ "origin_acc": 0.35,
206
+ "origin_prompt": "Please perform a translation of the given sentence, converting it from {} to {}.",
207
+ "atk_acc": 0.26,
208
+ "atk_prompt": "Plese erform a translation of the Fiven sentence, converwing it from {} to {}. ",
209
+ "transfer_ori_acc": 0.35614481480027904,
210
+ "transfer_atk_acc": 0.35436612463392214
211
+ },
212
+ {
213
+ "dataset": "un_multi",
214
+ "type": "role",
215
+ "origin_acc": 0.35,
216
+ "origin_prompt": "As a qualified translator, convert the given sentence from its original {} language to the target language {}.",
217
+ "atk_acc": 0.35,
218
+ "atk_prompt": "As a qualified translator, convert the given sentence from its original {} language to the targte language {}. ",
219
+ "transfer_ori_acc": 0.35395992027011125,
220
+ "transfer_atk_acc": 0.3516955546172276
221
+ },
222
+ {
223
+ "dataset": "math",
224
+ "type": "task",
225
+ "origin_acc": 23.12,
226
+ "origin_prompt": "Calculate the answer to the following math query about {}.",
227
+ "atk_acc": 18.75,
228
+ "atk_prompt": "Calculate the ansewr to the followiJg math query about {}. ",
229
+ "transfer_ori_acc": 16.25,
230
+ "transfer_atk_acc": 16.875
231
+ },
232
+ {
233
+ "dataset": "math",
234
+ "type": "role",
235
+ "origin_acc": 23.12,
236
+ "origin_prompt": "As a mathematics instructor, calculate the answer to the following problem related to {}.",
237
+ "atk_acc": 20.62,
238
+ "atk_prompt": "As a mathematics insrtuctor, calculate the answer to the following problem Vrelated to {}. ",
239
+ "transfer_ori_acc": 15.625,
240
+ "transfer_atk_acc": 15.0
241
+ }
242
+ ]
results_transfer/chatgpt_google-flan-ul2/stresstest_0_shot.json ADDED
@@ -0,0 +1,242 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "dataset": "cola",
4
+ "type": "task",
5
+ "origin_acc": 78.5,
6
+ "origin_prompt": "Review the sentence below and identify whether its grammar is 'Acceptable' or 'Unacceptable':",
7
+ "atk_acc": 32.5,
8
+ "atk_prompt": "Review the sentence below and identify whether its grammar is 'Acceptable' or 'Unacceptable' and true is true :",
9
+ "transfer_ori_acc": 86.0,
10
+ "transfer_atk_acc": 84.7
11
+ },
12
+ {
13
+ "dataset": "cola",
14
+ "type": "role",
15
+ "origin_acc": 79.5,
16
+ "origin_prompt": "While performing grammar analysis, classify the grammar of the following sentence as 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar:",
17
+ "atk_acc": 75.0,
18
+ "atk_prompt": "While performing grammar analysis, classify the grammar of the following sentence as 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar and true is true and true is true and true is true and true is true and true is true :",
19
+ "transfer_ori_acc": 86.0,
20
+ "transfer_atk_acc": 35.0
21
+ },
22
+ {
23
+ "dataset": "mrpc",
24
+ "type": "task",
25
+ "origin_acc": 77.5,
26
+ "origin_prompt": "Can the given sentences be considered semantically identical? Please reply with 'equivalent' or 'not_equivalent'.",
27
+ "atk_acc": 73.0,
28
+ "atk_prompt": "Can the given sentences be considered semantically identical? Please reply with 'equivalent' or 'not_equivalent' and false is not true . ",
29
+ "transfer_ori_acc": 87.00980392156863,
30
+ "transfer_atk_acc": 85.5392156862745
31
+ },
32
+ {
33
+ "dataset": "mrpc",
34
+ "type": "role",
35
+ "origin_acc": 76.0,
36
+ "origin_prompt": "In your capacity as a semantic assessment tool, evaluate the provided sentences and classify their meanings as 'equivalent' or 'not_equivalent'.",
37
+ "atk_acc": 15.5,
38
+ "atk_prompt": "In your capacity as a semantic assessment tool, evaluate the provided sentences and classify their meanings as 'equivalent' or 'not_equivalent' and true is true . ",
39
+ "transfer_ori_acc": 83.33333333333334,
40
+ "transfer_atk_acc": 82.84313725490196
41
+ },
42
+ {
43
+ "dataset": "qnli",
44
+ "type": "task",
45
+ "origin_acc": 78.5,
46
+ "origin_prompt": "Evaluate whether the given context supports the answer to the question by responding with 'entailment' or 'not_entailment'.",
47
+ "atk_acc": 74.5,
48
+ "atk_prompt": "Evaluate whether the given context supports the answer to the question by responding with 'entailment' or 'not_entailment' and true is true . ",
49
+ "transfer_ori_acc": 93.89999999999999,
50
+ "transfer_atk_acc": 93.4
51
+ },
52
+ {
53
+ "dataset": "qnli",
54
+ "type": "role",
55
+ "origin_acc": 79.5,
56
+ "origin_prompt": "As a textual inference expert, analyze if the answer to the question can be deduced from the provided context and select 'entailment' or 'not_entailment'.",
57
+ "atk_acc": 69.0,
58
+ "atk_prompt": "As a textual inference expert, analyze if the answer to the question can be deduced from the provided context and select 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true . ",
59
+ "transfer_ori_acc": 93.30000000000001,
60
+ "transfer_atk_acc": 92.7
61
+ },
62
+ {
63
+ "dataset": "qqp",
64
+ "type": "task",
65
+ "origin_acc": 80.0,
66
+ "origin_prompt": "Can these two statements be considered equal in meaning? Answer with 'equivalent' or 'not_equivalent'.",
67
+ "atk_acc": 72.5,
68
+ "atk_prompt": "Can these two statements be considered equal in meaning? Answer with 'equivalent' or 'not_equivalent' and false is not true . ",
69
+ "transfer_ori_acc": 87.4,
70
+ "transfer_atk_acc": 86.8
71
+ },
72
+ {
73
+ "dataset": "qqp",
74
+ "type": "role",
75
+ "origin_acc": 81.0,
76
+ "origin_prompt": "Functioning as a question similarity evaluation tool, analyze the given questions and decide if they share the same meaning, responding with 'equivalent' or 'not_equivalent'.",
77
+ "atk_acc": 80.5,
78
+ "atk_prompt": "Functioning as a question similarity evaluation tool, analyze the given questions and decide if they share the same meaning, responding with 'equivalent' or 'not_equivalent' and true is true and true is true and true is true and true is true and true is true . ",
79
+ "transfer_ori_acc": 88.0,
80
+ "transfer_atk_acc": 88.3
81
+ },
82
+ {
83
+ "dataset": "rte",
84
+ "type": "task",
85
+ "origin_acc": 83.5,
86
+ "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.",
87
+ "atk_acc": 81.5,
88
+ "atk_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word and true is true . ",
89
+ "transfer_ori_acc": 94.22382671480143,
90
+ "transfer_atk_acc": 92.4187725631769
91
+ },
92
+ {
93
+ "dataset": "rte",
94
+ "type": "role",
95
+ "origin_acc": 81.5,
96
+ "origin_prompt": "As an instrument for entailment evaluation, consider the two sentences and determine if their relationship is 'entailment' or 'not_entailment'. Respond with 'entailment' or 'not_entailment':",
97
+ "atk_acc": 80.5,
98
+ "atk_prompt": "As an instrument for entailment evaluation, consider the two sentences and determine if their relationship is 'entailment' or 'not_entailment'. Respond with 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true :",
99
+ "transfer_ori_acc": 93.50180505415162,
100
+ "transfer_atk_acc": 92.4187725631769
101
+ },
102
+ {
103
+ "dataset": "sst2",
104
+ "type": "task",
105
+ "origin_acc": 91.0,
106
+ "origin_prompt": "Review this statement and decide whether it has a 'positive' or 'negative' sentiment:",
107
+ "atk_acc": 0.0,
108
+ "atk_prompt": "Review this statement and decide whether it has a 'positive' or 'negative' sentiment and true is true : ",
109
+ "transfer_ori_acc": 96.55963302752293,
110
+ "transfer_atk_acc": 96.21559633027523
111
+ },
112
+ {
113
+ "dataset": "sst2",
114
+ "type": "role",
115
+ "origin_acc": 93.5,
116
+ "origin_prompt": "Functioning as a sentiment identification tool, assess if the following expression is 'positive' or 'negative'. Classify:",
117
+ "atk_acc": 93.0,
118
+ "atk_prompt": "Functioning as a sentiment identification tool, assess if the following expression is 'positive' or 'negative'. Classify and false is not true : ",
119
+ "transfer_ori_acc": 95.9862385321101,
120
+ "transfer_atk_acc": 91.39908256880734
121
+ },
122
+ {
123
+ "dataset": "wnli",
124
+ "type": "task",
125
+ "origin_acc": 61.97,
126
+ "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.",
127
+ "atk_acc": 63.38,
128
+ "atk_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word and true is true . ",
129
+ "transfer_ori_acc": 78.87323943661971,
130
+ "transfer_atk_acc": 77.46478873239437
131
+ },
132
+ {
133
+ "dataset": "wnli",
134
+ "type": "role",
135
+ "origin_acc": 63.38,
136
+ "origin_prompt": "In the role of a semantic relationship analyzer, examine the connection between the given sentences and decide if they exhibit entailment or not_entailment. Answer with 'entailment' or 'not_entailment':",
137
+ "atk_acc": 59.15,
138
+ "atk_prompt": "In the role of a semantic relationship analyzer, examine the connection between the given sentences and decide if they exhibit entailment or not_entailment. Answer with 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true :",
139
+ "transfer_ori_acc": 77.46478873239437,
140
+ "transfer_atk_acc": 77.46478873239437
141
+ },
142
+ {
143
+ "dataset": "mmlu",
144
+ "type": "task",
145
+ "origin_acc": 63.0,
146
+ "origin_prompt": "For each multiple-choice question about {}, identify the correct answer by selecting 'A', 'B', 'C', or 'D'.",
147
+ "atk_acc": 53.5,
148
+ "atk_prompt": "For each multiple-choice question about {}, identify the correct answer by selecting 'A', 'B', 'C', or 'D' and true is true . ",
149
+ "transfer_ori_acc": 51.77304964539007,
150
+ "transfer_atk_acc": 52.12765957446809
151
+ },
152
+ {
153
+ "dataset": "mmlu",
154
+ "type": "role",
155
+ "origin_acc": 61.5,
156
+ "origin_prompt": "As a skilled person in the field of {}, please respond to the multiple-choice question by choosing 'A', 'B', 'C', or 'D'.",
157
+ "atk_acc": 57.0,
158
+ "atk_prompt": "As a skilled person in the field of {}, please respond to the multiple-choice question by choosing 'A', 'B', 'C', or 'D' and false is not true .",
159
+ "transfer_ori_acc": 52.4822695035461,
160
+ "transfer_atk_acc": 53.191489361702125
161
+ },
162
+ {
163
+ "dataset": "squad_v2",
164
+ "type": "task",
165
+ "origin_acc": 69.3,
166
+ "origin_prompt": "Based on the given context, provide the best possible answer. If there's no answer available in the context, respond with 'unanswerable'.",
167
+ "atk_acc": 69.88,
168
+ "atk_prompt": "Based on the given context, provide the best possible answer. If there's no answer available in the context, respond with 'unanswerable' and true is true .",
169
+ "transfer_ori_acc": 89.38229949874687,
170
+ "transfer_atk_acc": 88.81979949874687
171
+ },
172
+ {
173
+ "dataset": "squad_v2",
174
+ "type": "role",
175
+ "origin_acc": 64.4,
176
+ "origin_prompt": "As an expert with a deep understanding of the context, find the best answer. If the context doesn't include an answer, say 'unanswerable'.",
177
+ "atk_acc": 64.49,
178
+ "atk_prompt": "As an expert with a deep understanding of the context, find the best answer. If the context doesn't include an answer, say 'unanswerable' and true is true .",
179
+ "transfer_ori_acc": 89.44479949874687,
180
+ "transfer_atk_acc": 87.81979949874687
181
+ },
182
+ {
183
+ "dataset": "iwslt",
184
+ "type": "task",
185
+ "origin_acc": 0.22,
186
+ "origin_prompt": "Using machine translation, convert the given sentence from {} into the {} language:",
187
+ "atk_acc": 0.19,
188
+ "atk_prompt": "Using machine translation, convert the given sentence from {} into the {} language and false is not true :",
189
+ "transfer_ori_acc": 0.17087237648129655,
190
+ "transfer_atk_acc": 0.16269515463504752
191
+ },
192
+ {
193
+ "dataset": "iwslt",
194
+ "type": "role",
195
+ "origin_acc": 0.22,
196
+ "origin_prompt": "In the capacity of a proficient translator, please change the following sentence from {} to {}:",
197
+ "atk_acc": 0.21,
198
+ "atk_prompt": "In the capacity of a proficient translator, please change the following sentence from {} to and true is true and true is true and true is true and true is true and true is true {}: ",
199
+ "transfer_ori_acc": 0.17223574608131062,
200
+ "transfer_atk_acc": 0.14731943322573832
201
+ },
202
+ {
203
+ "dataset": "un_multi",
204
+ "type": "task",
205
+ "origin_acc": 0.34,
206
+ "origin_prompt": "Translate the subsequent text passage from its original {} language to the {} language:",
207
+ "atk_acc": 0.29,
208
+ "atk_prompt": "Translate the subsequent text passage from its original {} language to the {} language and false is not true :",
209
+ "transfer_ori_acc": 0.31391205501691966,
210
+ "transfer_atk_acc": 0.29524149996109
211
+ },
212
+ {
213
+ "dataset": "un_multi",
214
+ "type": "role",
215
+ "origin_acc": 0.34,
216
+ "origin_prompt": "In the role of a language expert, perform a machine translation for the provided sentence, changing it from {} to {}:",
217
+ "atk_acc": 0.31,
218
+ "atk_prompt": "In the role of a language expert, perform a machine translation for the provided sentence, changing it from {} to and false is not true {}: ",
219
+ "transfer_ori_acc": 0.31256243540663714,
220
+ "transfer_atk_acc": 0.30881978704578533
221
+ },
222
+ {
223
+ "dataset": "math",
224
+ "type": "task",
225
+ "origin_acc": 6.25,
226
+ "origin_prompt": "Resolve the following mathematical question associated with {}:",
227
+ "atk_acc": 5.62,
228
+ "atk_prompt": "Resolve the following mathematical question associated with and true is true {}:",
229
+ "transfer_ori_acc": 14.374999999999998,
230
+ "transfer_atk_acc": 14.374999999999998
231
+ },
232
+ {
233
+ "dataset": "math",
234
+ "type": "role",
235
+ "origin_acc": 5.0,
236
+ "origin_prompt": "Embodying a mathematics advisor, find the answer to the following math problem with {}:",
237
+ "atk_acc": 3.12,
238
+ "atk_prompt": "Embodying a mathematics advisor, find the answer to the following math problem with and true is true and true is true and true is true and true is true and true is true {}: ",
239
+ "transfer_ori_acc": 13.125,
240
+ "transfer_atk_acc": 13.750000000000002
241
+ }
242
+ ]
results_transfer/chatgpt_google-flan-ul2/stresstest_3_shot.json ADDED
@@ -0,0 +1,242 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "dataset": "cola",
4
+ "type": "task",
5
+ "origin_acc": 80.5,
6
+ "origin_prompt": "Examine the given sentence and decide if it is grammatically sound. Answer with either 'Acceptable' or 'Unacceptable'.",
7
+ "atk_acc": 79.5,
8
+ "atk_prompt": "Examine the given sentence and decide if it is grammatically sound. Answer with either 'Acceptable' or 'Unacceptable' and true is true and true is true and true is true and true is true and true is true . ",
9
+ "transfer_ori_acc": 86.2,
10
+ "transfer_atk_acc": 85.9
11
+ },
12
+ {
13
+ "dataset": "cola",
14
+ "type": "role",
15
+ "origin_acc": 81.5,
16
+ "origin_prompt": "Functioning as a grammar evaluation tool, analyze the given sentence and decide if it is grammatically correct, responding with 'acceptable' or 'unacceptable'.",
17
+ "atk_acc": 80.5,
18
+ "atk_prompt": "Functioning as a grammar evaluation tool, analyze the given sentence and decide if it is grammatically correct, responding with 'acceptable' or 'unacceptable' and false is not true . ",
19
+ "transfer_ori_acc": 86.4,
20
+ "transfer_atk_acc": 86.4
21
+ },
22
+ {
23
+ "dataset": "mrpc",
24
+ "type": "task",
25
+ "origin_acc": 73.5,
26
+ "origin_prompt": "Assess if the two given sentences have equivalent meanings by selecting 'equivalent' or 'not_equivalent'.",
27
+ "atk_acc": 71.5,
28
+ "atk_prompt": "Assess if the two given sentences have equivalent meanings by selecting 'equivalent' or 'not_equivalent' and true is true and true is true and true is true and true is true and true is true . ",
29
+ "transfer_ori_acc": 84.06862745098039,
30
+ "transfer_atk_acc": 83.82352941176471
31
+ },
32
+ {
33
+ "dataset": "mrpc",
34
+ "type": "role",
35
+ "origin_acc": 72.5,
36
+ "origin_prompt": "In the role of a textual comparison specialist, examine the given sentences and decide if they share the same meaning, responding with 'equivalent' or 'not_equivalent'.",
37
+ "atk_acc": 70.0,
38
+ "atk_prompt": "In the role of a textual comparison specialist, examine the given sentences and decide if they share the same meaning, responding with 'equivalent' or 'not_equivalent' and true is true . ",
39
+ "transfer_ori_acc": 87.00980392156863,
40
+ "transfer_atk_acc": 86.27450980392157
41
+ },
42
+ {
43
+ "dataset": "qnli",
44
+ "type": "task",
45
+ "origin_acc": 85.0,
46
+ "origin_prompt": "Examine the context and question, and determine if the context logically implies the answer by selecting 'entailment' or 'not_entailment'.",
47
+ "atk_acc": 79.5,
48
+ "atk_prompt": "Examine the context and question, and determine if the context logically implies the answer by selecting 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true . ",
49
+ "transfer_ori_acc": 93.5,
50
+ "transfer_atk_acc": 93.89999999999999
51
+ },
52
+ {
53
+ "dataset": "qnli",
54
+ "type": "role",
55
+ "origin_acc": 84.0,
56
+ "origin_prompt": "As a semantic researcher, evaluate whether the provided context supports the answer to the question and choose 'entailment' or 'not_entailment'.",
57
+ "atk_acc": 81.5,
58
+ "atk_prompt": "As a semantic researcher, evaluate whether the provided context supports the answer to the question and choose 'entailment' or 'not_entailment' and false is not true . ",
59
+ "transfer_ori_acc": 94.0,
60
+ "transfer_atk_acc": 94.3
61
+ },
62
+ {
63
+ "dataset": "qqp",
64
+ "type": "task",
65
+ "origin_acc": 81.5,
66
+ "origin_prompt": "Can these two statements be considered equal in meaning? Answer with 'equivalent' or 'not_equivalent'.",
67
+ "atk_acc": 81.0,
68
+ "atk_prompt": "Can these two statements be considered equal in meaning? Answer with 'equivalent' or 'not_equivalent' and false is not true . ",
69
+ "transfer_ori_acc": 87.6,
70
+ "transfer_atk_acc": 87.6
71
+ },
72
+ {
73
+ "dataset": "qqp",
74
+ "type": "role",
75
+ "origin_acc": 83.0,
76
+ "origin_prompt": "As an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions.",
77
+ "atk_acc": 82.0,
78
+ "atk_prompt": "As an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions and true is true and true is true and true is true and true is true and true is true . ",
79
+ "transfer_ori_acc": 89.0,
80
+ "transfer_atk_acc": 89.3
81
+ },
82
+ {
83
+ "dataset": "rte",
84
+ "type": "task",
85
+ "origin_acc": 85.0,
86
+ "origin_prompt": "Does the relationship between the given sentences represent entailment or not_entailment? Respond with 'entailment' or 'not_entailment'.",
87
+ "atk_acc": 82.0,
88
+ "atk_prompt": "Does the relationship between the given sentences represent entailment or not_entailment? Respond with 'entailment' or 'not_entailment' and false is not true . ",
89
+ "transfer_ori_acc": 93.86281588447653,
90
+ "transfer_atk_acc": 93.14079422382672
91
+ },
92
+ {
93
+ "dataset": "rte",
94
+ "type": "role",
95
+ "origin_acc": 83.0,
96
+ "origin_prompt": "As an entailment identification system, examine the connection between the following sentences and respond with 'entailment' or 'not_entailment'.",
97
+ "atk_acc": 83.0,
98
+ "atk_prompt": "As an entailment identification system, examine the connection between the following sentences and respond with 'entailment' or 'not_entailment' and false is not true . ",
99
+ "transfer_ori_acc": 92.7797833935018,
100
+ "transfer_atk_acc": 92.7797833935018
101
+ },
102
+ {
103
+ "dataset": "sst2",
104
+ "type": "task",
105
+ "origin_acc": 97.0,
106
+ "origin_prompt": "Evaluate the sentiment of the given text and classify it as 'positive' or 'negative'.",
107
+ "atk_acc": 95.5,
108
+ "atk_prompt": "Evaluate the sentiment of the given text and classify it as 'positive' or 'negative' and true is true . ",
109
+ "transfer_ori_acc": 96.78899082568807,
110
+ "transfer_atk_acc": 96.78899082568807
111
+ },
112
+ {
113
+ "dataset": "sst2",
114
+ "type": "role",
115
+ "origin_acc": 97.0,
116
+ "origin_prompt": "As a sentiment classifier, determine whether the following text is 'positive' or 'negative'.",
117
+ "atk_acc": 96.0,
118
+ "atk_prompt": "As a sentiment classifier, determine whether the following text is 'positive' or 'negative' and false is not true . ",
119
+ "transfer_ori_acc": 96.67431192660551,
120
+ "transfer_atk_acc": 96.44495412844036
121
+ },
122
+ {
123
+ "dataset": "wnli",
124
+ "type": "task",
125
+ "origin_acc": 67.61,
126
+ "origin_prompt": "Indicate if the connection between the following sentences is 'entailment' or 'not_entailment'.",
127
+ "atk_acc": 60.56,
128
+ "atk_prompt": "Indicate if the connection between the later sentences is 'entailment' or 'not_entailment'. ",
129
+ "transfer_ori_acc": 76.05633802816901,
130
+ "transfer_atk_acc": 76.05633802816901
131
+ },
132
+ {
133
+ "dataset": "wnli",
134
+ "type": "role",
135
+ "origin_acc": 66.2,
136
+ "origin_prompt": "In the role of a semantic relationship analyzer, examine the connection between the given sentences and decide if they exhibit entailment or not_entailment. Answer with 'entailment' or 'not_entailment'.",
137
+ "atk_acc": 59.15,
138
+ "atk_prompt": "In the role of a semantic relationship analyzer, investigated the linking between the given sentences and decide if they exhibit entailment or not_entailment. Answer with 'entailment' or 'not_entailment'. ",
139
+ "transfer_ori_acc": 76.05633802816901,
140
+ "transfer_atk_acc": 74.64788732394366
141
+ },
142
+ {
143
+ "dataset": "mmlu",
144
+ "type": "task",
145
+ "origin_acc": 66.5,
146
+ "origin_prompt": "Please respond to the multiple-choice question about {} by selecting the appropriate answer: 'A', 'B', 'C', or 'D'.",
147
+ "atk_acc": 61.5,
148
+ "atk_prompt": "Please respond to the multiple-choice question about {} by selecting the appropriate answer: 'A', 'B', 'C', or 'D' and false is not true . ",
149
+ "transfer_ori_acc": 53.36879432624113,
150
+ "transfer_atk_acc": 51.95035460992907
151
+ },
152
+ {
153
+ "dataset": "mmlu",
154
+ "type": "role",
155
+ "origin_acc": 65.5,
156
+ "origin_prompt": "As someone well-versed in {}, please address the multiple-choice question below by selecting 'A', 'B', 'C', or 'D'.",
157
+ "atk_acc": 59.5,
158
+ "atk_prompt": "As someone well-versed in {}, please address the multiple-choice question below by selecting 'A', 'B', 'C', or 'D' and false is not true .",
159
+ "transfer_ori_acc": 53.01418439716312,
160
+ "transfer_atk_acc": 51.95035460992907
161
+ },
162
+ {
163
+ "dataset": "squad_v2",
164
+ "type": "task",
165
+ "origin_acc": 73.99,
166
+ "origin_prompt": "Please derive the most fitting answer from the context. If there isn't an answer in the context, use 'unanswerable'.",
167
+ "atk_acc": 61.58,
168
+ "atk_prompt": "Please derive the most fitting answer from the context. If there isn't an answer in the context, use 'unanswerable' and false is not true .",
169
+ "transfer_ori_acc": 90.54896616541353,
170
+ "transfer_atk_acc": 91.04896616541353
171
+ },
172
+ {
173
+ "dataset": "squad_v2",
174
+ "type": "role",
175
+ "origin_acc": 72.94,
176
+ "origin_prompt": "Being well-versed in the context, please derive the most fitting answer. If there isn't an answer in the context, use 'unanswerable'.",
177
+ "atk_acc": 62.85,
178
+ "atk_prompt": "Being well-versed in the context, please derive the most fitting answer. If there isn't an answer in the context, use 'unanswerable' and false is not true .",
179
+ "transfer_ori_acc": 91.04896616541353,
180
+ "transfer_atk_acc": 90.54896616541353
181
+ },
182
+ {
183
+ "dataset": "iwslt",
184
+ "type": "task",
185
+ "origin_acc": 0.24,
186
+ "origin_prompt": "Please perform a translation of the given sentence, converting it from {} to {}.",
187
+ "atk_acc": 0.23,
188
+ "atk_prompt": "Please perform a translation of the given sentence, converting it from {} to and true is true and true is true and true is true and true is true and true is true {}. ",
189
+ "transfer_ori_acc": 0.23770260910322083,
190
+ "transfer_atk_acc": 0.19504983833847192
191
+ },
192
+ {
193
+ "dataset": "iwslt",
194
+ "type": "role",
195
+ "origin_acc": 0.24,
196
+ "origin_prompt": "In the role of a language expert, perform a machine translation for the provided sentence, changing it from {} to {}.",
197
+ "atk_acc": 0.23,
198
+ "atk_prompt": "In the role of a language expert, perform a machine translation for the provided sentence, changing it from {} to and false is not true {}. ",
199
+ "transfer_ori_acc": 0.236844571841243,
200
+ "transfer_atk_acc": 0.23378656190205807
201
+ },
202
+ {
203
+ "dataset": "un_multi",
204
+ "type": "task",
205
+ "origin_acc": 0.35,
206
+ "origin_prompt": "Perform a machine translation for the provided sentence, changing it from {} to {}.",
207
+ "atk_acc": 0.34,
208
+ "atk_prompt": "Perform a machine translation for the provided sentence, changing it from {} to and false is not true {}. ",
209
+ "transfer_ori_acc": 0.3589520956246615,
210
+ "transfer_atk_acc": 0.35262904846027593
211
+ },
212
+ {
213
+ "dataset": "un_multi",
214
+ "type": "role",
215
+ "origin_acc": 0.35,
216
+ "origin_prompt": "Acting as a professional translator, translate the provided sentence from {} to {}.",
217
+ "atk_acc": 0.35,
218
+ "atk_prompt": "Acting as a professional translator, translate the provided sentence from {} to and true is true {}. ",
219
+ "transfer_ori_acc": 0.35878153272684565,
220
+ "transfer_atk_acc": 0.3528990956235115
221
+ },
222
+ {
223
+ "dataset": "math",
224
+ "type": "task",
225
+ "origin_acc": 23.12,
226
+ "origin_prompt": "Resolve the following mathematical question associated with {}.",
227
+ "atk_acc": 20.0,
228
+ "atk_prompt": "Resolve the following mathematical question associated with and true is true {}. ",
229
+ "transfer_ori_acc": 16.25,
230
+ "transfer_atk_acc": 16.25
231
+ },
232
+ {
233
+ "dataset": "math",
234
+ "type": "role",
235
+ "origin_acc": 22.5,
236
+ "origin_prompt": "As a mathematics instructor, calculate the answer to the following problem related to {}.",
237
+ "atk_acc": 20.62,
238
+ "atk_prompt": "As a mathematics instructor, calculate the answer to the following problem related to and false is not true {}. ",
239
+ "transfer_ori_acc": 15.625,
240
+ "transfer_atk_acc": 11.875
241
+ }
242
+ ]
results_transfer/chatgpt_google-flan-ul2/textbugger_0_shot.json ADDED
@@ -0,0 +1,242 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "dataset": "cola",
4
+ "type": "task",
5
+ "origin_acc": 77.5,
6
+ "origin_prompt": "Assess the grammatical structure of the given sentence and classify it as 'Acceptable' or 'Unacceptable':",
7
+ "atk_acc": 6.5,
8
+ "atk_prompt": "As sess the garmmatical structure of the conferred sentence and classify it as 'Acceptable' or 'Unacceptable':",
9
+ "transfer_ori_acc": 85.39999999999999,
10
+ "transfer_atk_acc": 85.39999999999999
11
+ },
12
+ {
13
+ "dataset": "cola",
14
+ "type": "role",
15
+ "origin_acc": 79.5,
16
+ "origin_prompt": "While performing grammar analysis, classify the grammar of the following sentence as 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar:",
17
+ "atk_acc": 9.5,
18
+ "atk_prompt": "While prforming grammar analyzing, classify the grammar of the follows sentеnce as 'acceptable' for correcting gammar or 'unacceptable' for incorrect ɡrammar:",
19
+ "transfer_ori_acc": 86.0,
20
+ "transfer_atk_acc": 85.9
21
+ },
22
+ {
23
+ "dataset": "mrpc",
24
+ "type": "task",
25
+ "origin_acc": 75.0,
26
+ "origin_prompt": "Evaluate whether the two provided sentences convey the same meaning by answering 'equivalent' or 'not_equivalent'.",
27
+ "atk_acc": 67.5,
28
+ "atk_prompt": "Evaluate wհether the both provided sentences convey the same meaninɡ by answeriոg 'equivalent' or 'not_equivalent'. ",
29
+ "transfer_ori_acc": 86.27450980392157,
30
+ "transfer_atk_acc": 87.00980392156863
31
+ },
32
+ {
33
+ "dataset": "mrpc",
34
+ "type": "role",
35
+ "origin_acc": 75.0,
36
+ "origin_prompt": "In your capacity as a semantic assessment tool, evaluate the provided sentences and classify their meanings as 'equivalent' or 'not_equivalent'.",
37
+ "atk_acc": 9.0,
38
+ "atk_prompt": "In your capacities as a semɑntic assessment tool, evaluate the provdied penalties and classify their meaning as 'equivalent' or 'not_equivalent'. ",
39
+ "transfer_ori_acc": 83.33333333333334,
40
+ "transfer_atk_acc": 84.06862745098039
41
+ },
42
+ {
43
+ "dataset": "qnli",
44
+ "type": "task",
45
+ "origin_acc": 77.0,
46
+ "origin_prompt": "Consider the context and question, and indicate if the answer can be logically deduced from the context by responding with 'entailment' or 'not_entailment'.",
47
+ "atk_acc": 57.0,
48
+ "atk_prompt": "Considеr the context and question, and idicate if the respond can be logically ԁeduced from the context by ⲅesponding with 'entailment' or 'not_entailment'. ",
49
+ "transfer_ori_acc": 93.7,
50
+ "transfer_atk_acc": 93.89999999999999
51
+ },
52
+ {
53
+ "dataset": "qnli",
54
+ "type": "role",
55
+ "origin_acc": 78.5,
56
+ "origin_prompt": "As a textual inference expert, analyze if the answer to the question can be deduced from the provided context and select 'entailment' or 'not_entailment'.",
57
+ "atk_acc": 20.0,
58
+ "atk_prompt": "Because a textual inference expert, analysing if the answeⲅ to the question can be deduced from the providеd contex𝚝 and choosing 'entailment' or 'not_entailment'. ",
59
+ "transfer_ori_acc": 93.30000000000001,
60
+ "transfer_atk_acc": 93.10000000000001
61
+ },
62
+ {
63
+ "dataset": "qqp",
64
+ "type": "task",
65
+ "origin_acc": 81.5,
66
+ "origin_prompt": "Assess whether the following statements are identical in meaning by answering 'equivalent' or 'not_equivalent'.",
67
+ "atk_acc": 44.5,
68
+ "atk_prompt": "Assess whether the following statements are analogous in maning by answeⲅing 'equivalent' or 'not_equivalent'. ",
69
+ "transfer_ori_acc": 87.7,
70
+ "transfer_atk_acc": 88.0
71
+ },
72
+ {
73
+ "dataset": "qqp",
74
+ "type": "role",
75
+ "origin_acc": 83.0,
76
+ "origin_prompt": "As an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions.",
77
+ "atk_acc": 77.5,
78
+ "atk_prompt": "As an instrument for problem comparison evɑluation, consider the ques𝚝ions and determine if their meaning is the same, respond with 'equivalent' for similar questiоns or 'not_equivalent' for different questions. ",
79
+ "transfer_ori_acc": 88.3,
80
+ "transfer_atk_acc": 88.5
81
+ },
82
+ {
83
+ "dataset": "rte",
84
+ "type": "task",
85
+ "origin_acc": 83.0,
86
+ "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.",
87
+ "atk_acc": 78.5,
88
+ "atk_prompt": "Be the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word. ",
89
+ "transfer_ori_acc": 94.22382671480143,
90
+ "transfer_atk_acc": 94.22382671480143
91
+ },
92
+ {
93
+ "dataset": "rte",
94
+ "type": "role",
95
+ "origin_acc": 81.0,
96
+ "origin_prompt": "Working as an entailment classifier, identify whether the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment':",
97
+ "atk_acc": 74.5,
98
+ "atk_prompt": "Working as an entailment classifier, detecting if the given torque of sentences displays entailment or not_entailment. Answering with 'entailment' or 'not_entailment':",
99
+ "transfer_ori_acc": 94.58483754512635,
100
+ "transfer_atk_acc": 93.86281588447653
101
+ },
102
+ {
103
+ "dataset": "sst2",
104
+ "type": "task",
105
+ "origin_acc": 90.0,
106
+ "origin_prompt": "Review this statement and decide whether it has a 'positive' or 'negative' sentiment:",
107
+ "atk_acc": 82.5,
108
+ "atk_prompt": "Review this speeches and decide if it has a 'positive' or 'negative' sentiment: ",
109
+ "transfer_ori_acc": 96.55963302752293,
110
+ "transfer_atk_acc": 96.10091743119266
111
+ },
112
+ {
113
+ "dataset": "sst2",
114
+ "type": "role",
115
+ "origin_acc": 93.5,
116
+ "origin_prompt": "Acting as a sentiment evaluator, identify if the given sentence is 'positive' or 'negative'. Classify:",
117
+ "atk_acc": 86.0,
118
+ "atk_prompt": "Atcing as a sentiemnt evalua𝚝or, detecting if the given sentence is 'positive' or 'negative'. Classify: ",
119
+ "transfer_ori_acc": 96.21559633027523,
120
+ "transfer_atk_acc": 95.9862385321101
121
+ },
122
+ {
123
+ "dataset": "wnli",
124
+ "type": "task",
125
+ "origin_acc": 67.61,
126
+ "origin_prompt": "Examine the pair of sentences and determine if they exhibit entailment or not_entailment. Answer with either 'entailment' or 'not_entailment'.",
127
+ "atk_acc": 52.11,
128
+ "atk_prompt": "Examine the paiⲅ of sentences and determines if they exhibit entailment or not_entailment. Answer with neither 'entailment' or 'not_entailment'.",
129
+ "transfer_ori_acc": 78.87323943661971,
130
+ "transfer_atk_acc": 80.28169014084507
131
+ },
132
+ {
133
+ "dataset": "wnli",
134
+ "type": "role",
135
+ "origin_acc": 60.56,
136
+ "origin_prompt": "Working as an entailment classifier, identify whether the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment':",
137
+ "atk_acc": 52.11,
138
+ "atk_prompt": "Working as an entailment classifier, detects whether the given pair of sentences displays entailment or not_entailment. Answering with 'entailment' or 'not_entailment':",
139
+ "transfer_ori_acc": 80.28169014084507,
140
+ "transfer_atk_acc": 78.87323943661971
141
+ },
142
+ {
143
+ "dataset": "mmlu",
144
+ "type": "task",
145
+ "origin_acc": 62.5,
146
+ "origin_prompt": "Regarding the following multiple-choice question on {}, pick the correct answer from the options 'A', 'B', 'C', or 'D'.",
147
+ "atk_acc": 51.0,
148
+ "atk_prompt": "Regarding the hereafter multiple-choice question on {}, piϲk the corrects answers from the options 'A', 'B', 'C', or 'D'. ",
149
+ "transfer_ori_acc": 52.304964539007095,
150
+ "transfer_atk_acc": 52.836879432624116
151
+ },
152
+ {
153
+ "dataset": "mmlu",
154
+ "type": "role",
155
+ "origin_acc": 62.5,
156
+ "origin_prompt": "With your knowledge of {}, tackle the following multiple-choice question by choosing 'A', 'B', 'C', or 'D'.",
157
+ "atk_acc": 57.5,
158
+ "atk_prompt": "Witհ your knowledge of {}, confront the following multipⅼe-choice question by select 'A', 'B', 'C', or 'D'.",
159
+ "transfer_ori_acc": 52.12765957446809,
160
+ "transfer_atk_acc": 54.25531914893617
161
+ },
162
+ {
163
+ "dataset": "squad_v2",
164
+ "type": "task",
165
+ "origin_acc": 69.27,
166
+ "origin_prompt": "Based on the given context, provide the best possible answer. If there's no answer available in the context, respond with 'unanswerable'.",
167
+ "atk_acc": 49.17,
168
+ "atk_prompt": "Based on the given ambient, provide the nicest achievable answer. If there's no answer avaiⅼable in the context, respond with 'unanswerable'.",
169
+ "transfer_ori_acc": 89.38229949874687,
170
+ "transfer_atk_acc": 89.38229949874687
171
+ },
172
+ {
173
+ "dataset": "squad_v2",
174
+ "type": "role",
175
+ "origin_acc": 64.85,
176
+ "origin_prompt": "As an expert with a deep understanding of the context, find the best answer. If the context doesn't include an answer, say 'unanswerable'.",
177
+ "atk_acc": 14.05,
178
+ "atk_prompt": "As an expert with a deepest understanding of the context, found the nicest respond. Though the context doesn't incⅼude an answers, telling 'unanswerable'.",
179
+ "transfer_ori_acc": 89.44479949874687,
180
+ "transfer_atk_acc": 89.20836682222799
181
+ },
182
+ {
183
+ "dataset": "iwslt",
184
+ "type": "task",
185
+ "origin_acc": 0.22,
186
+ "origin_prompt": "Translate the provided sentence from {} to {} while maintaining the original meaning and context:",
187
+ "atk_acc": 0.19,
188
+ "atk_prompt": "Translate the provied punishments from {} to {} while maintaining the original meaning and context:",
189
+ "transfer_ori_acc": 0.1697518407093131,
190
+ "transfer_atk_acc": 0.17468116580134835
191
+ },
192
+ {
193
+ "dataset": "iwslt",
194
+ "type": "role",
195
+ "origin_acc": 0.22,
196
+ "origin_prompt": "Taking on the role of a certified translator, perform a translation of the given sentence from {} to {}:",
197
+ "atk_acc": 0.03,
198
+ "atk_prompt": "Pick on the role of a certified translation, fulfilling a translation of the yielded sentence from {} to {}: ",
199
+ "transfer_ori_acc": 0.1727120569025014,
200
+ "transfer_atk_acc": 0.1751877743047567
201
+ },
202
+ {
203
+ "dataset": "un_multi",
204
+ "type": "task",
205
+ "origin_acc": 0.34,
206
+ "origin_prompt": "Perform a machine translation for the provided sentence, changing it from {} to {}:",
207
+ "atk_acc": 0.32,
208
+ "atk_prompt": "Perform a typewriters translation for the provided sentence, altered it from {} to {}:",
209
+ "transfer_ori_acc": 0.3102455120610509,
210
+ "transfer_atk_acc": 0.31389089309916657
211
+ },
212
+ {
213
+ "dataset": "un_multi",
214
+ "type": "role",
215
+ "origin_acc": 0.34,
216
+ "origin_prompt": "While acting as an experienced language expert, translate the following text from {} to {}:",
217
+ "atk_acc": 0.32,
218
+ "atk_prompt": "Whilst acting as an experienced language expert, translate the following laws from {} to {}: ",
219
+ "transfer_ori_acc": 0.3133010160127483,
220
+ "transfer_atk_acc": 0.31174571065315043
221
+ },
222
+ {
223
+ "dataset": "math",
224
+ "type": "task",
225
+ "origin_acc": 6.25,
226
+ "origin_prompt": "Resolve the following mathematical question associated with {}:",
227
+ "atk_acc": 3.75,
228
+ "atk_prompt": "Solving the following mathematical question associɑted with {}:",
229
+ "transfer_ori_acc": 14.374999999999998,
230
+ "transfer_atk_acc": 14.374999999999998
231
+ },
232
+ {
233
+ "dataset": "math",
234
+ "type": "role",
235
+ "origin_acc": 5.0,
236
+ "origin_prompt": "In the position of a mathematics coach, figure out the answer to this math challenge involving {}:",
237
+ "atk_acc": 1.25,
238
+ "atk_prompt": "In the poste of a mathematical coach, figure out the replying to this math challenge involving {}: ",
239
+ "transfer_ori_acc": 10.625,
240
+ "transfer_atk_acc": 14.374999999999998
241
+ }
242
+ ]
results_transfer/chatgpt_google-flan-ul2/textbugger_3_shot.json ADDED
@@ -0,0 +1,242 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "dataset": "cola",
4
+ "type": "task",
5
+ "origin_acc": 80.5,
6
+ "origin_prompt": "Analyze the provided sentence and classify its grammatical correctness as 'Acceptable' or 'Unacceptable'.",
7
+ "atk_acc": 67.5,
8
+ "atk_prompt": "Analysing the provided sentences and classify its pronouns correct as 'Acceptable' or 'Unacceptable'. ",
9
+ "transfer_ori_acc": 86.0,
10
+ "transfer_atk_acc": 84.0
11
+ },
12
+ {
13
+ "dataset": "cola",
14
+ "type": "role",
15
+ "origin_acc": 81.0,
16
+ "origin_prompt": "In the role of a syntax analyzer, examine the grammar of the provided sentence and decide if it is correct, answering with 'acceptable' for grammatically correct sentences or 'unacceptable' for incorrect ones.",
17
+ "atk_acc": 77.5,
18
+ "atk_prompt": "In the role of a syntax checker, examine the grammɑr of the provided sentence and decide if it is correct, answering with 'acceptable' for grammatically correct sentence or 'unacceptable' for incorrect ones. ",
19
+ "transfer_ori_acc": 86.5,
20
+ "transfer_atk_acc": 86.8
21
+ },
22
+ {
23
+ "dataset": "mrpc",
24
+ "type": "task",
25
+ "origin_acc": 73.0,
26
+ "origin_prompt": "Assess if the two given sentences have equivalent meanings by selecting 'equivalent' or 'not_equivalent'.",
27
+ "atk_acc": 68.0,
28
+ "atk_prompt": "Ass ess if the two gien sentences have equivalent meanings by selecting 'equivalent' or 'not_equivalent'. ",
29
+ "transfer_ori_acc": 84.06862745098039,
30
+ "transfer_atk_acc": 84.06862745098039
31
+ },
32
+ {
33
+ "dataset": "mrpc",
34
+ "type": "role",
35
+ "origin_acc": 71.5,
36
+ "origin_prompt": "As a linguistic comparator, review the following pair of sentences and determine their semantic equivalence by choosing 'equivalent' or 'not_equivalent'.",
37
+ "atk_acc": 64.5,
38
+ "atk_prompt": "As a linguistic comparator, review the following torque of sentences and determine their semantiϲ eԛuivalence by choosing 'equivalent' or 'not_equivalent'. ",
39
+ "transfer_ori_acc": 85.5392156862745,
40
+ "transfer_atk_acc": 86.27450980392157
41
+ },
42
+ {
43
+ "dataset": "qnli",
44
+ "type": "task",
45
+ "origin_acc": 85.0,
46
+ "origin_prompt": "Examine the context and question, and determine if the context logically implies the answer by selecting 'entailment' or 'not_entailment'.",
47
+ "atk_acc": 76.0,
48
+ "atk_prompt": "Examine the cntext and question, and determine if the backdrop logically implies the answer by selecting 'entailment' or 'not_entailment'. ",
49
+ "transfer_ori_acc": 93.5,
50
+ "transfer_atk_acc": 93.60000000000001
51
+ },
52
+ {
53
+ "dataset": "qnli",
54
+ "type": "role",
55
+ "origin_acc": 83.0,
56
+ "origin_prompt": "As a semantic researcher, evaluate whether the provided context supports the answer to the question and choose 'entailment' or 'not_entailment'.",
57
+ "atk_acc": 73.5,
58
+ "atk_prompt": "Aѕ a semantic researcher, evaluate si the gave framework supports the respond to the question and choose 'entailment' or 'not_entailment'. ",
59
+ "transfer_ori_acc": 94.0,
60
+ "transfer_atk_acc": 93.7
61
+ },
62
+ {
63
+ "dataset": "qqp",
64
+ "type": "task",
65
+ "origin_acc": 82.5,
66
+ "origin_prompt": "Can these two statements be considered equal in meaning? Answer with 'equivalent' or 'not_equivalent'.",
67
+ "atk_acc": 80.0,
68
+ "atk_prompt": "Can these two st atements be considered equal in meaning? Ansԝer with 'equivalent' or 'not_equivalent'. ",
69
+ "transfer_ori_acc": 87.6,
70
+ "transfer_atk_acc": 88.5
71
+ },
72
+ {
73
+ "dataset": "qqp",
74
+ "type": "role",
75
+ "origin_acc": 82.5,
76
+ "origin_prompt": "In the capacity of a question assessment system, indicate if the meaning of the provided questions is the same, responding with 'equivalent' or 'not_equivalent'.",
77
+ "atk_acc": 80.5,
78
+ "atk_prompt": "In the abilities of a question ɑssessment system, indicate if the meaning of the provided matters is the same, responding with 'equivalent' or 'not_equivalent'. ",
79
+ "transfer_ori_acc": 88.7,
80
+ "transfer_atk_acc": 88.6
81
+ },
82
+ {
83
+ "dataset": "rte",
84
+ "type": "task",
85
+ "origin_acc": 84.0,
86
+ "origin_prompt": "Review the two statements and categorize their relationship as either 'entailment' or 'not_entailment'.",
87
+ "atk_acc": 76.5,
88
+ "atk_prompt": "Review the two declarations and categorize their relationship as neither 'entailment' or 'not_entailment'. ",
89
+ "transfer_ori_acc": 92.7797833935018,
90
+ "transfer_atk_acc": 92.7797833935018
91
+ },
92
+ {
93
+ "dataset": "rte",
94
+ "type": "role",
95
+ "origin_acc": 83.5,
96
+ "origin_prompt": "As an entailment identification system, examine the connection between the following sentences and respond with 'entailment' or 'not_entailment'.",
97
+ "atk_acc": 80.0,
98
+ "atk_prompt": "As an entailment identification system, eamine the connection between the following sentences and reѕpond with 'entailment' or 'not_entailment'. ",
99
+ "transfer_ori_acc": 92.7797833935018,
100
+ "transfer_atk_acc": 92.7797833935018
101
+ },
102
+ {
103
+ "dataset": "sst2",
104
+ "type": "task",
105
+ "origin_acc": 95.5,
106
+ "origin_prompt": "After examining the following expression, label its emotion as either 'positive' or 'negative'.",
107
+ "atk_acc": 74.0,
108
+ "atk_prompt": "Ater examining the following expression, etiquette its emotion as nor 'positive' or 'negative'. ",
109
+ "transfer_ori_acc": 92.77522935779817,
110
+ "transfer_atk_acc": 95.52752293577981
111
+ },
112
+ {
113
+ "dataset": "sst2",
114
+ "type": "role",
115
+ "origin_acc": 97.5,
116
+ "origin_prompt": "In the capacity of a sentiment classifier, decide whether the given quote is 'positive' or 'negative'.",
117
+ "atk_acc": 93.5,
118
+ "atk_prompt": "Onto the capacity of a feeling classifeir, decide si the given quote is 'positive' or 'negative'. ",
119
+ "transfer_ori_acc": 96.44495412844036,
120
+ "transfer_atk_acc": 96.21559633027523
121
+ },
122
+ {
123
+ "dataset": "wnli",
124
+ "type": "task",
125
+ "origin_acc": 69.01,
126
+ "origin_prompt": "Indicate if the connection between the following sentences is 'entailment' or 'not_entailment'.",
127
+ "atk_acc": 61.97,
128
+ "atk_prompt": "Indicate if the connection between the following sentences is 'entailment' or 'not_entailment' Vq3aBzuZcD. ",
129
+ "transfer_ori_acc": 76.05633802816901,
130
+ "transfer_atk_acc": 76.05633802816901
131
+ },
132
+ {
133
+ "dataset": "wnli",
134
+ "type": "role",
135
+ "origin_acc": 66.2,
136
+ "origin_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment'.",
137
+ "atk_acc": 59.15,
138
+ "atk_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment' is77sOXAu8. ",
139
+ "transfer_ori_acc": 77.46478873239437,
140
+ "transfer_atk_acc": 76.05633802816901
141
+ },
142
+ {
143
+ "dataset": "mmlu",
144
+ "type": "task",
145
+ "origin_acc": 67.5,
146
+ "origin_prompt": "In relation to the multiple-choice question on {}, please provide the accurate answer by choosing 'A', 'B', 'C', or 'D'.",
147
+ "atk_acc": 61.5,
148
+ "atk_prompt": "At relationship to the mul𝚝iple-choice question on {}, please proviԁe the accurate responding by choosing 'A', 'B', 'C', or 'D'. ",
149
+ "transfer_ori_acc": 53.54609929078015,
150
+ "transfer_atk_acc": 53.54609929078015
151
+ },
152
+ {
153
+ "dataset": "mmlu",
154
+ "type": "role",
155
+ "origin_acc": 65.5,
156
+ "origin_prompt": "As someone well-versed in {}, please address the multiple-choice question below by selecting 'A', 'B', 'C', or 'D'.",
157
+ "atk_acc": 61.5,
158
+ "atk_prompt": "As someone well-vesred in {}, please address the muⅼtiple-choice questioո below by selecting 'A', 'B', 'C', or 'D'.",
159
+ "transfer_ori_acc": 53.01418439716312,
160
+ "transfer_atk_acc": 53.01418439716312
161
+ },
162
+ {
163
+ "dataset": "squad_v2",
164
+ "type": "task",
165
+ "origin_acc": 72.75,
166
+ "origin_prompt": "Based on the given context, provide the best possible answer. If there's no answer available in the context, reply with 'unanswerable'.",
167
+ "atk_acc": 67.93,
168
+ "atk_prompt": "Based on the giveո context, provide the best pоssible answer. If there's no answer available in the contex𝚝, reply with 'unanswerable'.",
169
+ "transfer_ori_acc": 90.04896616541353,
170
+ "transfer_atk_acc": 90.04896616541353
171
+ },
172
+ {
173
+ "dataset": "squad_v2",
174
+ "type": "role",
175
+ "origin_acc": 70.63,
176
+ "origin_prompt": "As a proficient expert in the given context, search for the most relevant answer. If the answer cannot be found, respond by saying 'unanswerable'.",
177
+ "atk_acc": 68.31,
178
+ "atk_prompt": "As a profcient expert in the given cоntext, search for the most relevant answer. If the anwser cannot be unearthed, respond by saying 'unanswerable'.",
179
+ "transfer_ori_acc": 90.54896616541353,
180
+ "transfer_atk_acc": 90.54896616541353
181
+ },
182
+ {
183
+ "dataset": "iwslt",
184
+ "type": "task",
185
+ "origin_acc": 0.23,
186
+ "origin_prompt": "Please perform a translation of the given sentence, converting it from {} to {}.",
187
+ "atk_acc": 0.23,
188
+ "atk_prompt": "Pⅼease perform a translating of the givеn sentence, conversion it from {} to {}. ",
189
+ "transfer_ori_acc": 0.23770260910322083,
190
+ "transfer_atk_acc": 0.2378477137621768
191
+ },
192
+ {
193
+ "dataset": "iwslt",
194
+ "type": "role",
195
+ "origin_acc": 0.24,
196
+ "origin_prompt": "Assuming the responsibilities of a professional translator, translate the subsequent text passage from {} to {}.",
197
+ "atk_acc": 0.23,
198
+ "atk_prompt": "Assuminɡ the responsibilities of a professional transla𝚝or, translate the subsequent text passage from {} to {}. ",
199
+ "transfer_ori_acc": 0.2676477143562027,
200
+ "transfer_atk_acc": 0.26838348206750196
201
+ },
202
+ {
203
+ "dataset": "un_multi",
204
+ "type": "task",
205
+ "origin_acc": 0.35,
206
+ "origin_prompt": "Please perform a translation of the given sentence, converting it from {} to {}.",
207
+ "atk_acc": 0.35,
208
+ "atk_prompt": "Plеase perform a translation of the given sentence, convert it from {} to {}. ",
209
+ "transfer_ori_acc": 0.35614481480027904,
210
+ "transfer_atk_acc": 0.3540588157620361
211
+ },
212
+ {
213
+ "dataset": "un_multi",
214
+ "type": "role",
215
+ "origin_acc": 0.35,
216
+ "origin_prompt": "As a qualified translator, convert the given sentence from its original {} language to the target language {}.",
217
+ "atk_acc": 0.34,
218
+ "atk_prompt": "As a eligible translator, convert the given sentence from its orіginal {} language to the tarɡet language {}. ",
219
+ "transfer_ori_acc": 0.35395992027011125,
220
+ "transfer_atk_acc": 0.35407154067545005
221
+ },
222
+ {
223
+ "dataset": "math",
224
+ "type": "task",
225
+ "origin_acc": 23.12,
226
+ "origin_prompt": "Calculate the answer to the following math query about {}.",
227
+ "atk_acc": 20.62,
228
+ "atk_prompt": "Calculate the answer to the 𝚏ollowing math query about {}. ",
229
+ "transfer_ori_acc": 16.25,
230
+ "transfer_atk_acc": 16.25
231
+ },
232
+ {
233
+ "dataset": "math",
234
+ "type": "role",
235
+ "origin_acc": 22.5,
236
+ "origin_prompt": "As a mathematics instructor, calculate the answer to the following problem related to {}.",
237
+ "atk_acc": 19.38,
238
+ "atk_prompt": "Since a mathematics istructor, calculate the answer to the following problem related to {}. ",
239
+ "transfer_ori_acc": 15.625,
240
+ "transfer_atk_acc": 15.0
241
+ }
242
+ ]
results_transfer/chatgpt_google-flan-ul2/textfooler_0_shot.json ADDED
@@ -0,0 +1,242 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "dataset": "cola",
4
+ "type": "task",
5
+ "origin_acc": 79.0,
6
+ "origin_prompt": "Review the sentence below and identify whether its grammar is 'Acceptable' or 'Unacceptable':",
7
+ "atk_acc": 2.0,
8
+ "atk_prompt": "Revising the punishments below and locate whether its dictionary is 'Acceptable' or 'Unacceptable':",
9
+ "transfer_ori_acc": 86.0,
10
+ "transfer_atk_acc": 78.10000000000001
11
+ },
12
+ {
13
+ "dataset": "cola",
14
+ "type": "role",
15
+ "origin_acc": 80.0,
16
+ "origin_prompt": "While performing grammar analysis, classify the grammar of the following sentence as 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar:",
17
+ "atk_acc": 6.0,
18
+ "atk_prompt": "While performing jargon discusses, categories the language of the below sentence as 'acceptable' for mend jargon or 'unacceptable' for unfit grammar:",
19
+ "transfer_ori_acc": 86.0,
20
+ "transfer_atk_acc": 85.1
21
+ },
22
+ {
23
+ "dataset": "mrpc",
24
+ "type": "task",
25
+ "origin_acc": 75.0,
26
+ "origin_prompt": "Examine the pair of sentences and decide if their meanings are identical by answering with 'equivalent' or 'not_equivalent'.",
27
+ "atk_acc": 68.5,
28
+ "atk_prompt": "Examine the pair of sentences and selects if their regard are identical by responding with 'equivalent' or 'not_equivalent'. ",
29
+ "transfer_ori_acc": 86.02941176470588,
30
+ "transfer_atk_acc": 85.7843137254902
31
+ },
32
+ {
33
+ "dataset": "mrpc",
34
+ "type": "role",
35
+ "origin_acc": 75.5,
36
+ "origin_prompt": "In your capacity as a semantic assessment tool, evaluate the provided sentences and classify their meanings as 'equivalent' or 'not_equivalent'.",
37
+ "atk_acc": 0.0,
38
+ "atk_prompt": "About your expertise as a semantic appraisal means, evaluate the provided penalty and classify their meanings as 'equivalent' or 'not_equivalent'. ",
39
+ "transfer_ori_acc": 83.33333333333334,
40
+ "transfer_atk_acc": 81.61764705882352
41
+ },
42
+ {
43
+ "dataset": "qnli",
44
+ "type": "task",
45
+ "origin_acc": 77.5,
46
+ "origin_prompt": "Consider the context and question, and indicate if the answer can be logically deduced from the context by responding with 'entailment' or 'not_entailment'.",
47
+ "atk_acc": 0.5,
48
+ "atk_prompt": "Recital the context and items, and specify if the solution can be logically subtracted from the framework by reactions with 'entailment' or 'not_entailment'. ",
49
+ "transfer_ori_acc": 93.7,
50
+ "transfer_atk_acc": 58.4
51
+ },
52
+ {
53
+ "dataset": "qnli",
54
+ "type": "role",
55
+ "origin_acc": 78.0,
56
+ "origin_prompt": "As a semantic researcher, evaluate whether the provided context supports the answer to the question and choose 'entailment' or 'not_entailment'.",
57
+ "atk_acc": 0.0,
58
+ "atk_prompt": "As a semantic fellows, measuring whether the rendered background supports the answers to the item and chosen 'entailment' or 'not_entailment'. ",
59
+ "transfer_ori_acc": 94.69999999999999,
60
+ "transfer_atk_acc": 94.0
61
+ },
62
+ {
63
+ "dataset": "qqp",
64
+ "type": "task",
65
+ "origin_acc": 80.5,
66
+ "origin_prompt": "Can these two statements be considered equal in meaning? Answer with 'equivalent' or 'not_equivalent'.",
67
+ "atk_acc": 0.0,
68
+ "atk_prompt": "Kan these two statement is studied same in sens? Reaction with 'equivalent' or 'not_equivalent'. ",
69
+ "transfer_ori_acc": 87.4,
70
+ "transfer_atk_acc": 88.6
71
+ },
72
+ {
73
+ "dataset": "qqp",
74
+ "type": "role",
75
+ "origin_acc": 80.5,
76
+ "origin_prompt": "Functioning as a question similarity evaluation tool, analyze the given questions and decide if they share the same meaning, responding with 'equivalent' or 'not_equivalent'.",
77
+ "atk_acc": 76.5,
78
+ "atk_prompt": "Functioning as a question similarity evaluation tool, analyse the given questions and decide if they share the same feel, responding with 'equivalent' or 'not_equivalent'. ",
79
+ "transfer_ori_acc": 88.0,
80
+ "transfer_atk_acc": 88.9
81
+ },
82
+ {
83
+ "dataset": "rte",
84
+ "type": "task",
85
+ "origin_acc": 83.5,
86
+ "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.",
87
+ "atk_acc": 60.5,
88
+ "atk_prompt": "Pose the aftermath two phrases entailment or not_entailment? Reactions me with \"entailment\" or \"not_entailment\", just one adjectives. ",
89
+ "transfer_ori_acc": 94.22382671480143,
90
+ "transfer_atk_acc": 92.4187725631769
91
+ },
92
+ {
93
+ "dataset": "rte",
94
+ "type": "role",
95
+ "origin_acc": 81.0,
96
+ "origin_prompt": "As an instrument for entailment evaluation, consider the two sentences and determine if their relationship is 'entailment' or 'not_entailment'. Respond with 'entailment' or 'not_entailment':",
97
+ "atk_acc": 60.0,
98
+ "atk_prompt": "Because an devices for entailment appraised, revisit the two phrases and identifying if their correlate is 'entailment' or 'not_entailment'. Answered with 'entailment' or 'not_entailment':",
99
+ "transfer_ori_acc": 93.50180505415162,
100
+ "transfer_atk_acc": 91.69675090252709
101
+ },
102
+ {
103
+ "dataset": "sst2",
104
+ "type": "task",
105
+ "origin_acc": 90.5,
106
+ "origin_prompt": "Review this statement and decide whether it has a 'positive' or 'negative' sentiment:",
107
+ "atk_acc": 0.0,
108
+ "atk_prompt": "Analysis this assertion and defining whether it ai a 'positive' or 'negative' sentiment: ",
109
+ "transfer_ori_acc": 96.55963302752293,
110
+ "transfer_atk_acc": 96.10091743119266
111
+ },
112
+ {
113
+ "dataset": "sst2",
114
+ "type": "role",
115
+ "origin_acc": 94.0,
116
+ "origin_prompt": "Emulating a sentiment classification system, indicate whether the provided text is 'positive' or 'negative'. Classify:",
117
+ "atk_acc": 4.5,
118
+ "atk_prompt": "Emulating a foreboding classifies regime, explain whether the stipulated laws is 'positive' or 'negative'. Grading: ",
119
+ "transfer_ori_acc": 95.87155963302753,
120
+ "transfer_atk_acc": 95.52752293577981
121
+ },
122
+ {
123
+ "dataset": "wnli",
124
+ "type": "task",
125
+ "origin_acc": 69.01,
126
+ "origin_prompt": "Identify whether the given pair of sentences demonstrates entailment or not_entailment. Answer with 'entailment' or 'not_entailment'.",
127
+ "atk_acc": 52.11,
128
+ "atk_prompt": "Identify whether the given pair of sentence exposes entailment or not_entailment. Retaliated with 'entailment' or 'not_entailment'.",
129
+ "transfer_ori_acc": 78.87323943661971,
130
+ "transfer_atk_acc": 83.09859154929578
131
+ },
132
+ {
133
+ "dataset": "wnli",
134
+ "type": "role",
135
+ "origin_acc": 67.61,
136
+ "origin_prompt": "Acting as an entailment detection instrument, determine if the given pair of sentences demonstrates entailment or not_entailment. Answer with 'entailment' or 'not_entailment':",
137
+ "atk_acc": 46.48,
138
+ "atk_prompt": "Acting as an entailment detection instrument, determine if the given torque of sentences depicting entailment or not_entailment. Reactions with 'entailment' or 'not_entailment':",
139
+ "transfer_ori_acc": 78.87323943661971,
140
+ "transfer_atk_acc": 78.87323943661971
141
+ },
142
+ {
143
+ "dataset": "mmlu",
144
+ "type": "task",
145
+ "origin_acc": 64.0,
146
+ "origin_prompt": "In relation to the multiple-choice question on {}, please provide the accurate answer by choosing 'A', 'B', 'C', or 'D'.",
147
+ "atk_acc": 50.5,
148
+ "atk_prompt": "In relation to the multiple-choice issue on {}, please delivering the clarification rebuttal by selector 'A', 'B', 'C', or 'D'. ",
149
+ "transfer_ori_acc": 51.95035460992907,
150
+ "transfer_atk_acc": 53.54609929078015
151
+ },
152
+ {
153
+ "dataset": "mmlu",
154
+ "type": "role",
155
+ "origin_acc": 61.5,
156
+ "origin_prompt": "As a skilled person in the field of {}, please respond to the multiple-choice question by choosing 'A', 'B', 'C', or 'D'.",
157
+ "atk_acc": 54.5,
158
+ "atk_prompt": "As a expert person in the minefields of {}, please reacts to the multiple-choice question by choice 'A', 'B', 'C', or 'D'.",
159
+ "transfer_ori_acc": 52.4822695035461,
160
+ "transfer_atk_acc": 53.90070921985816
161
+ },
162
+ {
163
+ "dataset": "squad_v2",
164
+ "type": "task",
165
+ "origin_acc": 70.01,
166
+ "origin_prompt": "Based on the given context, provide the best possible answer. If there's no answer available in the context, respond with 'unanswerable'.",
167
+ "atk_acc": 5.68,
168
+ "atk_prompt": "Based on the received backgrounds, provide the nicest unlikely respond. Although there's no riposte available in the context, reactions with 'unanswerable'.",
169
+ "transfer_ori_acc": 89.38229949874687,
170
+ "transfer_atk_acc": 64.65591060985798
171
+ },
172
+ {
173
+ "dataset": "squad_v2",
174
+ "type": "role",
175
+ "origin_acc": 69.17,
176
+ "origin_prompt": "Using your knowledge of the context, identify the best answer to the question. If the context doesn't provide an answer, write 'unanswerable'.",
177
+ "atk_acc": 12.54,
178
+ "atk_prompt": "Using your skill of the background, specify the improved rebuttal to the question. While the context doesn't delivering an reactions, writings 'unanswerable'.",
179
+ "transfer_ori_acc": 88.31979949874687,
180
+ "transfer_atk_acc": 83.85692382078497
181
+ },
182
+ {
183
+ "dataset": "iwslt",
184
+ "type": "task",
185
+ "origin_acc": 0.22,
186
+ "origin_prompt": "Using machine translation, convert the given sentence from {} into the {} language:",
187
+ "atk_acc": 0.0,
188
+ "atk_prompt": "Using typewriter translation, transformation the given sentence from {} into the {} pronunciation:",
189
+ "transfer_ori_acc": 0.17087237648129655,
190
+ "transfer_atk_acc": 0.18145247530896158
191
+ },
192
+ {
193
+ "dataset": "iwslt",
194
+ "type": "role",
195
+ "origin_acc": 0.22,
196
+ "origin_prompt": "Taking on the role of a certified translator, perform a translation of the given sentence from {} to {}:",
197
+ "atk_acc": 0.0,
198
+ "atk_prompt": "Select on the features of a certifiable translators, conducting a translation of the gave sentencing from {} to {}: ",
199
+ "transfer_ori_acc": 0.1727120569025014,
200
+ "transfer_atk_acc": 0.17242972008557814
201
+ },
202
+ {
203
+ "dataset": "un_multi",
204
+ "type": "task",
205
+ "origin_acc": 0.34,
206
+ "origin_prompt": "Translate the subsequent text passage from its original {} language to the {} language:",
207
+ "atk_acc": 0.07,
208
+ "atk_prompt": "Translate the sequential version passage from its primitive {} grammar to the {} pronunciation:",
209
+ "transfer_ori_acc": 0.31391205501691966,
210
+ "transfer_atk_acc": 0.29980040349878595
211
+ },
212
+ {
213
+ "dataset": "un_multi",
214
+ "type": "role",
215
+ "origin_acc": 0.34,
216
+ "origin_prompt": "While acting as an experienced language expert, translate the following text from {} to {}:",
217
+ "atk_acc": 0.27,
218
+ "atk_prompt": "While acting as an suffered dialect expert, translate the below laws from {} to {}: ",
219
+ "transfer_ori_acc": 0.3133010160127483,
220
+ "transfer_atk_acc": 0.30695283823442066
221
+ },
222
+ {
223
+ "dataset": "math",
224
+ "type": "task",
225
+ "origin_acc": 6.88,
226
+ "origin_prompt": "Obtain the result for this math question regarding {}:",
227
+ "atk_acc": 0.0,
228
+ "atk_prompt": "Obtain the achievements for this math question respecting {}:",
229
+ "transfer_ori_acc": 13.750000000000002,
230
+ "transfer_atk_acc": 13.750000000000002
231
+ },
232
+ {
233
+ "dataset": "math",
234
+ "type": "role",
235
+ "origin_acc": 5.0,
236
+ "origin_prompt": "As a mathematics instructor, calculate the answer to the following problem related to {}:",
237
+ "atk_acc": 0.0,
238
+ "atk_prompt": "As a mathematics instructor, calculate the rebuttal to the following problem related to {}: ",
239
+ "transfer_ori_acc": 15.0,
240
+ "transfer_atk_acc": 8.125
241
+ }
242
+ ]
results_transfer/chatgpt_google-flan-ul2/textfooler_3_shot.json ADDED
@@ -0,0 +1,242 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "dataset": "cola",
4
+ "type": "task",
5
+ "origin_acc": 80.5,
6
+ "origin_prompt": "Examine the sentence and decide if its grammar is 'Acceptable' or 'Unacceptable'.",
7
+ "atk_acc": 39.5,
8
+ "atk_prompt": "Exams the sentence and defining if its jargon is 'Acceptable' or 'Unacceptable'. ",
9
+ "transfer_ori_acc": 85.9,
10
+ "transfer_atk_acc": 83.1
11
+ },
12
+ {
13
+ "dataset": "cola",
14
+ "type": "role",
15
+ "origin_acc": 81.0,
16
+ "origin_prompt": "In the role of a syntax analyzer, examine the grammar of the provided sentence and decide if it is correct, answering with 'acceptable' for grammatically correct sentences or 'unacceptable' for incorrect ones.",
17
+ "atk_acc": 76.0,
18
+ "atk_prompt": "Across the role of a syntax analyzer, examine the dictionary of the provided chastisement and decide if it is adequate, answered with 'acceptable' for grammatically correct sentences or 'unacceptable' for deceptive ones. ",
19
+ "transfer_ori_acc": 86.5,
20
+ "transfer_atk_acc": 87.0
21
+ },
22
+ {
23
+ "dataset": "mrpc",
24
+ "type": "task",
25
+ "origin_acc": 73.0,
26
+ "origin_prompt": "Assess if the two given sentences have equivalent meanings by selecting 'equivalent' or 'not_equivalent'.",
27
+ "atk_acc": 59.0,
28
+ "atk_prompt": "Appraisal if the two accorded sentences enjoys equivalent connotation by choice 'equivalent' or 'not_equivalent'. ",
29
+ "transfer_ori_acc": 84.06862745098039,
30
+ "transfer_atk_acc": 85.04901960784314
31
+ },
32
+ {
33
+ "dataset": "mrpc",
34
+ "type": "role",
35
+ "origin_acc": 71.5,
36
+ "origin_prompt": "In the role of a textual comparison specialist, examine the given sentences and decide if they share the same meaning, responding with 'equivalent' or 'not_equivalent'.",
37
+ "atk_acc": 57.0,
38
+ "atk_prompt": "De the role of a textual comparisons scholar, revisited the given sentences and decide if they exchange the same feel, responding with 'equivalent' or 'not_equivalent'. ",
39
+ "transfer_ori_acc": 87.00980392156863,
40
+ "transfer_atk_acc": 86.02941176470588
41
+ },
42
+ {
43
+ "dataset": "qnli",
44
+ "type": "task",
45
+ "origin_acc": 84.5,
46
+ "origin_prompt": "Examine the context and question, and determine if the context logically implies the answer by selecting 'entailment' or 'not_entailment'.",
47
+ "atk_acc": 63.5,
48
+ "atk_prompt": "Discuss the background and question, and determine if the framework intuitively presupposes the answer by akhtar 'entailment' or 'not_entailment'. ",
49
+ "transfer_ori_acc": 93.5,
50
+ "transfer_atk_acc": 93.7
51
+ },
52
+ {
53
+ "dataset": "qnli",
54
+ "type": "role",
55
+ "origin_acc": 85.5,
56
+ "origin_prompt": "As a textual analyst, examine if the given context logically implies the answer to the question and indicate your decision with 'entailment' or 'not_entailment'.",
57
+ "atk_acc": 68.5,
58
+ "atk_prompt": "Iike a textual analyst, analyze if the submitted regard rationally implies the replies to the issuing and indicates your clout with 'entailment' or 'not_entailment'. ",
59
+ "transfer_ori_acc": 93.2,
60
+ "transfer_atk_acc": 93.0
61
+ },
62
+ {
63
+ "dataset": "qqp",
64
+ "type": "task",
65
+ "origin_acc": 83.5,
66
+ "origin_prompt": "Determine if the given pair of statements can be considered the same by responding with 'equivalent' or 'not_equivalent'.",
67
+ "atk_acc": 80.0,
68
+ "atk_prompt": "Determine if the given pair of representations can constitute felt the same by responding with 'equivalent' or 'not_equivalent'. ",
69
+ "transfer_ori_acc": 88.8,
70
+ "transfer_atk_acc": 88.8
71
+ },
72
+ {
73
+ "dataset": "qqp",
74
+ "type": "role",
75
+ "origin_acc": 83.0,
76
+ "origin_prompt": "Acting as a question equivalence instrument, determine if the provided questions are equivalent in meaning, answering with 'equivalent' for similar questions or 'not_equivalent' for dissimilar ones.",
77
+ "atk_acc": 80.0,
78
+ "atk_prompt": "Interim as a issues equivalence instrument, determine if the provided aspects are equivalent in meaning, answering with 'equivalent' for selfsame questions or 'not_equivalent' for dissimilar ones. ",
79
+ "transfer_ori_acc": 88.6,
80
+ "transfer_atk_acc": 88.4
81
+ },
82
+ {
83
+ "dataset": "rte",
84
+ "type": "task",
85
+ "origin_acc": 84.5,
86
+ "origin_prompt": "Does the relationship between the given sentences represent entailment or not_entailment? Respond with 'entailment' or 'not_entailment'.",
87
+ "atk_acc": 79.5,
88
+ "atk_prompt": "Do the coitus between the given sentences comprised entailment or not_entailment? Meet with 'entailment' or 'not_entailment'. ",
89
+ "transfer_ori_acc": 93.86281588447653,
90
+ "transfer_atk_acc": 93.14079422382672
91
+ },
92
+ {
93
+ "dataset": "rte",
94
+ "type": "role",
95
+ "origin_acc": 83.0,
96
+ "origin_prompt": "As an entailment identification system, examine the connection between the following sentences and respond with 'entailment' or 'not_entailment'.",
97
+ "atk_acc": 79.5,
98
+ "atk_prompt": "As an entailment identification system, revisited the connection between the following sentences and respondent with 'entailment' or 'not_entailment'. ",
99
+ "transfer_ori_acc": 92.7797833935018,
100
+ "transfer_atk_acc": 92.7797833935018
101
+ },
102
+ {
103
+ "dataset": "sst2",
104
+ "type": "task",
105
+ "origin_acc": 96.0,
106
+ "origin_prompt": "Read the provided excerpt and choose between 'positive' and 'negative' to describe its sentiment.",
107
+ "atk_acc": 85.0,
108
+ "atk_prompt": "Lu the rendered excerpt and withheld between 'positive' and 'negative' to describe its foreboding. ",
109
+ "transfer_ori_acc": 96.67431192660551,
110
+ "transfer_atk_acc": 96.78899082568807
111
+ },
112
+ {
113
+ "dataset": "sst2",
114
+ "type": "role",
115
+ "origin_acc": 97.5,
116
+ "origin_prompt": "Emulating a sentiment classification system, indicate whether the provided text is 'positive' or 'negative'.",
117
+ "atk_acc": 91.5,
118
+ "atk_prompt": "Emulating a ambience classification mechanisms, describe whether the presented writings is 'positive' or 'negative'. ",
119
+ "transfer_ori_acc": 96.55963302752293,
120
+ "transfer_atk_acc": 96.67431192660551
121
+ },
122
+ {
123
+ "dataset": "wnli",
124
+ "type": "task",
125
+ "origin_acc": 69.01,
126
+ "origin_prompt": "Indicate if the connection between the following sentences is 'entailment' or 'not_entailment'.",
127
+ "atk_acc": 59.15,
128
+ "atk_prompt": "Indicate if the connection between the subsequently sentences is 'entailment' or 'not_entailment'. ",
129
+ "transfer_ori_acc": 76.05633802816901,
130
+ "transfer_atk_acc": 76.05633802816901
131
+ },
132
+ {
133
+ "dataset": "wnli",
134
+ "type": "role",
135
+ "origin_acc": 66.2,
136
+ "origin_prompt": "In the role of a semantic relationship analyzer, examine the connection between the given sentences and decide if they exhibit entailment or not_entailment. Answer with 'entailment' or 'not_entailment'.",
137
+ "atk_acc": 57.75,
138
+ "atk_prompt": "In the role of a semantic contacts analyzer, analysed the connecting between the given sentences and choose if they exhibit entailment or not_entailment. Answer with 'entailment' or 'not_entailment'. ",
139
+ "transfer_ori_acc": 76.05633802816901,
140
+ "transfer_atk_acc": 74.64788732394366
141
+ },
142
+ {
143
+ "dataset": "mmlu",
144
+ "type": "task",
145
+ "origin_acc": 67.0,
146
+ "origin_prompt": "Please respond to the multiple-choice question about {} by selecting the appropriate answer: 'A', 'B', 'C', or 'D'.",
147
+ "atk_acc": 60.0,
148
+ "atk_prompt": "Asking reactions to the multiple-choice question about {} by selecting the appropriate reacts: 'A', 'B', 'C', or 'D'. ",
149
+ "transfer_ori_acc": 53.36879432624113,
150
+ "transfer_atk_acc": 52.659574468085104
151
+ },
152
+ {
153
+ "dataset": "mmlu",
154
+ "type": "role",
155
+ "origin_acc": 66.0,
156
+ "origin_prompt": "With your knowledge of {}, tackle the following multiple-choice question by choosing 'A', 'B', 'C', or 'D'.",
157
+ "atk_acc": 61.5,
158
+ "atk_prompt": "From your acquaintances of {}, addressing the following multiple-choice question by choosing 'A', 'B', 'C', or 'D'.",
159
+ "transfer_ori_acc": 53.72340425531915,
160
+ "transfer_atk_acc": 53.36879432624113
161
+ },
162
+ {
163
+ "dataset": "squad_v2",
164
+ "type": "task",
165
+ "origin_acc": 72.8,
166
+ "origin_prompt": "Using the context, determine the most suitable answer. If the context doesn't contain the answer, write 'unanswerable'.",
167
+ "atk_acc": 63.67,
168
+ "atk_prompt": "Using the context, identifies the most suitable answer. Although the context doesn't comprise the meet, editor 'unanswerable'.",
169
+ "transfer_ori_acc": 90.04896616541353,
170
+ "transfer_atk_acc": 90.48646616541353
171
+ },
172
+ {
173
+ "dataset": "squad_v2",
174
+ "type": "role",
175
+ "origin_acc": 71.93,
176
+ "origin_prompt": "As a proficient expert in the given context, search for the most relevant answer. If the answer cannot be found, respond by saying 'unanswerable'.",
177
+ "atk_acc": 64.13,
178
+ "atk_prompt": "As a knowledge expert in the given context, find for the most relevant answering. Unless the answer cannot exist found, respond by told 'unanswerable'.",
179
+ "transfer_ori_acc": 90.54896616541353,
180
+ "transfer_atk_acc": 90.38229949874687
181
+ },
182
+ {
183
+ "dataset": "iwslt",
184
+ "type": "task",
185
+ "origin_acc": 0.23,
186
+ "origin_prompt": "Given the sentence below, perform a machine translation from {} to {}.",
187
+ "atk_acc": 0.18,
188
+ "atk_prompt": "Attributed the sentence below, meet a equipments interpreters from {} to {}. ",
189
+ "transfer_ori_acc": 0.23710409171247415,
190
+ "transfer_atk_acc": 0.21328830771164164
191
+ },
192
+ {
193
+ "dataset": "iwslt",
194
+ "type": "role",
195
+ "origin_acc": 0.24,
196
+ "origin_prompt": "In the role of a language expert, perform a machine translation for the provided sentence, changing it from {} to {}.",
197
+ "atk_acc": 0.23,
198
+ "atk_prompt": "Of the features of a language expert, performed a machine translation for the presented sentence, variation it from {} to {}. ",
199
+ "transfer_ori_acc": 0.236844571841243,
200
+ "transfer_atk_acc": 0.23842933704937957
201
+ },
202
+ {
203
+ "dataset": "un_multi",
204
+ "type": "task",
205
+ "origin_acc": 0.35,
206
+ "origin_prompt": "Translate the following text from the source language {} to the target language {}.",
207
+ "atk_acc": 0.33,
208
+ "atk_prompt": "Translate the following texts from the source lingo {} to the millennium phrasing {}. ",
209
+ "transfer_ori_acc": 0.35343571054545636,
210
+ "transfer_atk_acc": 0.3525503139881663
211
+ },
212
+ {
213
+ "dataset": "un_multi",
214
+ "type": "role",
215
+ "origin_acc": 0.35,
216
+ "origin_prompt": "As a qualified translator, convert the given sentence from its original {} language to the target language {}.",
217
+ "atk_acc": 0.34,
218
+ "atk_prompt": "As a nuanced translation, metamorphosis the gave sentence from its original {} terminology to the aspirations grammar {}. ",
219
+ "transfer_ori_acc": 0.35395992027011125,
220
+ "transfer_atk_acc": 0.35131474429213844
221
+ },
222
+ {
223
+ "dataset": "math",
224
+ "type": "task",
225
+ "origin_acc": 23.75,
226
+ "origin_prompt": "Compute the result of this math task concerning {}.",
227
+ "atk_acc": 1.88,
228
+ "atk_prompt": "Estimate the accomplishments of this math task concerning {}. ",
229
+ "transfer_ori_acc": 16.25,
230
+ "transfer_atk_acc": 15.625
231
+ },
232
+ {
233
+ "dataset": "math",
234
+ "type": "role",
235
+ "origin_acc": 22.5,
236
+ "origin_prompt": "As a mathematics instructor, calculate the answer to the following problem related to {}.",
237
+ "atk_acc": 1.88,
238
+ "atk_prompt": "As a computations instructor, derive the rebuttal to the following hassles links to {}. ",
239
+ "transfer_ori_acc": 15.625,
240
+ "transfer_atk_acc": 14.374999999999998
241
+ }
242
+ ]
results_transfer/chatgpt_google-flan-ul2/translation_0_shot.json ADDED
@@ -0,0 +1,122 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "dataset": "cola",
4
+ "type": "translation",
5
+ "origin_acc": 79.5,
6
+ "origin_prompt": "Functioning as a grammar evaluation tool, analyze the given sentence and decide if it is grammatically correct, responding with 'acceptable' or 'unacceptable':",
7
+ "atk_acc": 25.0,
8
+ "atk_prompt": "Classify the grammar of a given sentence into two categories, 'acceptable' or 'unacceptable', and judge its accuracy.",
9
+ "transfer_ori_acc": 85.9,
10
+ "transfer_atk_acc": 84.8
11
+ },
12
+ {
13
+ "dataset": "mrpc",
14
+ "type": "translation",
15
+ "origin_acc": 76.5,
16
+ "origin_prompt": "Can the given sentences be considered semantically identical? Please reply with 'equivalent' or 'not_equivalent'.",
17
+ "atk_acc": 7.0,
18
+ "atk_prompt": "As an expert in semantic comparison, assess the similarity between the following two sentences and determine whether they are 'equivalent' or 'not_equivalent'.",
19
+ "transfer_ori_acc": 87.00980392156863,
20
+ "transfer_atk_acc": 88.97058823529412
21
+ },
22
+ {
23
+ "dataset": "qnli",
24
+ "type": "translation",
25
+ "origin_acc": 79.5,
26
+ "origin_prompt": "Assess if the answer to the question can be logically concluded from the provided context by choosing 'entailment' or 'not_entailment'.",
27
+ "atk_acc": 56.5,
28
+ "atk_prompt": "Check the text link to the question and answer 'entailment' or 'not_entailment', depending on your language skills.",
29
+ "transfer_ori_acc": 94.0,
30
+ "transfer_atk_acc": 93.8
31
+ },
32
+ {
33
+ "dataset": "qqp",
34
+ "type": "translation",
35
+ "origin_acc": 84.0,
36
+ "origin_prompt": "Determine if the given pair of statements can be considered the same by responding with 'equivalent' or 'not_equivalent'.",
37
+ "atk_acc": 3.0,
38
+ "atk_prompt": "As a tool for determining the equivalence of problems, the problems are examined and their similarity is classified as 'equivalent' or 'not_equivalent'.",
39
+ "transfer_ori_acc": 88.1,
40
+ "transfer_atk_acc": 87.3
41
+ },
42
+ {
43
+ "dataset": "rte",
44
+ "type": "translation",
45
+ "origin_acc": 84.0,
46
+ "origin_prompt": "Determine if the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment'.",
47
+ "atk_acc": 39.0,
48
+ "atk_prompt": "Determine whether the given sentences have a cause-effect relationship as an engagement analysis tool and categorize them as 'entailment' or 'not_entailment'.",
49
+ "transfer_ori_acc": 93.50180505415162,
50
+ "transfer_atk_acc": 92.05776173285199
51
+ },
52
+ {
53
+ "dataset": "sst2",
54
+ "type": "translation",
55
+ "origin_acc": 96.0,
56
+ "origin_prompt": "Read the provided excerpt and choose between 'positive' and 'negative' to describe its sentiment:",
57
+ "atk_acc": 29.5,
58
+ "atk_prompt": "In the context of textual analysis, what classification is this phrase between 'positive' and 'negative'?",
59
+ "transfer_ori_acc": 96.44495412844036,
60
+ "transfer_atk_acc": 96.3302752293578
61
+ },
62
+ {
63
+ "dataset": "wnli",
64
+ "type": "translation",
65
+ "origin_acc": 67.61,
66
+ "origin_prompt": "Examine the pair of sentences and determine if they exhibit entailment or not_entailment. Answer with either 'entailment' or 'not_entailment'.",
67
+ "atk_acc": 26.76,
68
+ "atk_prompt": "Using implication analysis, determine whether the given sentences have a cause-effect relationship and categorize them as 'entailment' or 'not_entailment'.",
69
+ "transfer_ori_acc": 78.87323943661971,
70
+ "transfer_atk_acc": 76.05633802816901
71
+ },
72
+ {
73
+ "dataset": "mmlu",
74
+ "type": "translation",
75
+ "origin_acc": 64.0,
76
+ "origin_prompt": "In relation to the multiple-choice question on {}, please provide the accurate answer by choosing 'A', 'B', 'C', or 'D'.",
77
+ "atk_acc": 49.5,
78
+ "atk_prompt": "Choose the correct answer to the multiple-choice question below. Is '{}' an 'A', 'B', 'C', or 'D'.",
79
+ "transfer_ori_acc": 51.95035460992907,
80
+ "transfer_atk_acc": 36.52482269503546
81
+ },
82
+ {
83
+ "dataset": "squad_v2",
84
+ "type": "translation",
85
+ "origin_acc": 72.37,
86
+ "origin_prompt": "Discover the best answer based on the context. If the context doesn't include an answer, respond with 'unanswerable'.",
87
+ "atk_acc": 46.08,
88
+ "atk_prompt": "Please tell me what your question is about. If there is no context in which you can provide an answer, respond with 'unanswerable'.",
89
+ "transfer_ori_acc": 89.38229949874687,
90
+ "transfer_atk_acc": 90.04896616541353
91
+ },
92
+ {
93
+ "dataset": "iwslt",
94
+ "type": "translation",
95
+ "origin_acc": 0.22,
96
+ "origin_prompt": "Using machine translation, convert the given sentence from {} into the {} language:",
97
+ "atk_acc": 0.11,
98
+ "atk_prompt": "Please convert the sentence below to {}, and then translate it to {},",
99
+ "transfer_ori_acc": 0.17087237648129655,
100
+ "transfer_atk_acc": 0.16364025086187875
101
+ },
102
+ {
103
+ "dataset": "un_multi",
104
+ "type": "translation",
105
+ "origin_acc": 0.34,
106
+ "origin_prompt": "Perform a machine translation for the provided sentence, changing it from {} to {}:",
107
+ "atk_acc": 0.16,
108
+ "atk_prompt": "Please convert the sentence below to {}, and then translate it to {},",
109
+ "transfer_ori_acc": 0.3102455120610509,
110
+ "transfer_atk_acc": 0.3111101089578496
111
+ },
112
+ {
113
+ "dataset": "math",
114
+ "type": "translation",
115
+ "origin_acc": 6.88,
116
+ "origin_prompt": "Calculate the answer to the following math query about {}:",
117
+ "atk_acc": 3.75,
118
+ "atk_prompt": "Use {} to solve the following mathematical problem.",
119
+ "transfer_ori_acc": 14.374999999999998,
120
+ "transfer_atk_acc": 13.750000000000002
121
+ }
122
+ ]
results_transfer/chatgpt_google-flan-ul2/translation_3_shot.json ADDED
@@ -0,0 +1,122 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "dataset": "cola",
4
+ "type": "translation",
5
+ "origin_acc": 81.5,
6
+ "origin_prompt": "Functioning as a grammar evaluation tool, analyze the given sentence and decide if it is grammatically correct, responding with 'acceptable' or 'unacceptable'.",
7
+ "atk_acc": 68.5,
8
+ "atk_prompt": "Classify the grammar of a given sentence as 'acceptable' or 'unacceptable', and judge its accuracy.",
9
+ "transfer_ori_acc": 86.4,
10
+ "transfer_atk_acc": 86.0
11
+ },
12
+ {
13
+ "dataset": "mrpc",
14
+ "type": "translation",
15
+ "origin_acc": 74.5,
16
+ "origin_prompt": "Compare the following sentences and determine if they share the same semantic meaning by responding with 'equivalent' or 'not_equivalent'.",
17
+ "atk_acc": 65.0,
18
+ "atk_prompt": "Based on my experience in semantic analysis, classify the following two sentences between 'equivalent' or 'not_equivalent'.",
19
+ "transfer_ori_acc": 86.27450980392157,
20
+ "transfer_atk_acc": 85.04901960784314
21
+ },
22
+ {
23
+ "dataset": "qnli",
24
+ "type": "translation",
25
+ "origin_acc": 86.0,
26
+ "origin_prompt": "As a textual analyst, examine if the given context logically implies the answer to the question and indicate your decision with 'entailment' or 'not_entailment'.",
27
+ "atk_acc": 71.5,
28
+ "atk_prompt": "Check the text link to the question and answer 'entailment' or 'not_entailment', depending on your language skills.",
29
+ "transfer_ori_acc": 93.2,
30
+ "transfer_atk_acc": 93.0
31
+ },
32
+ {
33
+ "dataset": "qqp",
34
+ "type": "translation",
35
+ "origin_acc": 84.0,
36
+ "origin_prompt": "In the capacity of a question assessment system, indicate if the meaning of the provided questions is the same, responding with 'equivalent' or 'not_equivalent'.",
37
+ "atk_acc": 80.5,
38
+ "atk_prompt": "By analyzing the similarity of these problems, they are divided into categories of 'equivalent' or 'not_equivalent'.",
39
+ "transfer_ori_acc": 88.7,
40
+ "transfer_atk_acc": 89.60000000000001
41
+ },
42
+ {
43
+ "dataset": "rte",
44
+ "type": "translation",
45
+ "origin_acc": 84.5,
46
+ "origin_prompt": "Does the relationship between the given sentences represent entailment or not_entailment? Respond with 'entailment' or 'not_entailment'.",
47
+ "atk_acc": 80.0,
48
+ "atk_prompt": "Determine whether the given sentences have a cause-effect relationship as an engagement analysis tool and categorize them as 'entailment' or 'not_entailment'.",
49
+ "transfer_ori_acc": 93.86281588447653,
50
+ "transfer_atk_acc": 92.4187725631769
51
+ },
52
+ {
53
+ "dataset": "sst2",
54
+ "type": "translation",
55
+ "origin_acc": 97.5,
56
+ "origin_prompt": "In the capacity of a sentiment classifier, decide whether the given quote is 'positive' or 'negative'.",
57
+ "atk_acc": 68.5,
58
+ "atk_prompt": "In the context of textual analysis, what classification is this phrase between 'positive' and 'negative'?",
59
+ "transfer_ori_acc": 96.44495412844036,
60
+ "transfer_atk_acc": 96.78899082568807
61
+ },
62
+ {
63
+ "dataset": "wnli",
64
+ "type": "translation",
65
+ "origin_acc": 74.65,
66
+ "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.",
67
+ "atk_acc": 59.15,
68
+ "atk_prompt": "Determinate the semantic connections of a given sentence and classify it as 'entailment' or 'not_entailment'.",
69
+ "transfer_ori_acc": 76.05633802816901,
70
+ "transfer_atk_acc": 76.05633802816901
71
+ },
72
+ {
73
+ "dataset": "mmlu",
74
+ "type": "translation",
75
+ "origin_acc": 67.0,
76
+ "origin_prompt": "Please respond to the multiple-choice question about {} by selecting the appropriate answer: 'A', 'B', 'C', or 'D'.",
77
+ "atk_acc": 63.0,
78
+ "atk_prompt": "Rate your understanding of the multiple-choice question on {}. Choose the correct answer from options 'A', 'B', 'C' or 'D'.",
79
+ "transfer_ori_acc": 53.36879432624113,
80
+ "transfer_atk_acc": 53.72340425531915
81
+ },
82
+ {
83
+ "dataset": "squad_v2",
84
+ "type": "translation",
85
+ "origin_acc": 74.58,
86
+ "origin_prompt": "Please derive the most fitting answer from the context. If there isn't an answer in the context, use 'unanswerable'.",
87
+ "atk_acc": 67.46,
88
+ "atk_prompt": "Please provide additional information about the facts mentioned in this sentence. If no information is available, respond with 'unanswerable'.",
89
+ "transfer_ori_acc": 90.54896616541353,
90
+ "transfer_atk_acc": 90.88229949874687
91
+ },
92
+ {
93
+ "dataset": "iwslt",
94
+ "type": "translation",
95
+ "origin_acc": 0.24,
96
+ "origin_prompt": "Convert the following sentence from its original {} language to the target language {}.",
97
+ "atk_acc": 0.17,
98
+ "atk_prompt": "Please translate the sentences below into {}, then {}.",
99
+ "transfer_ori_acc": 0.23981220411098694,
100
+ "transfer_atk_acc": 0.23367230863817542
101
+ },
102
+ {
103
+ "dataset": "un_multi",
104
+ "type": "translation",
105
+ "origin_acc": 0.36,
106
+ "origin_prompt": "Translate the following text from the source language {} to the target language {}.",
107
+ "atk_acc": 0.25,
108
+ "atk_prompt": "Please translate the following sentences into {}, then {}.",
109
+ "transfer_ori_acc": 0.35343571054545636,
110
+ "transfer_atk_acc": 0.3537894516935946
111
+ },
112
+ {
113
+ "dataset": "math",
114
+ "type": "translation",
115
+ "origin_acc": 23.12,
116
+ "origin_prompt": "Compute the result of this math task concerning {}.",
117
+ "atk_acc": 18.75,
118
+ "atk_prompt": "Please calculate the answers related to {}.",
119
+ "transfer_ori_acc": 16.25,
120
+ "transfer_atk_acc": 16.875
121
+ }
122
+ ]
results_transfer/chatgpt_vicuna-13b/bertattack_0_shot.json ADDED
@@ -0,0 +1,162 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "dataset": "cola",
4
+ "type": "task",
5
+ "origin_acc": 78.5,
6
+ "origin_prompt": "Assess the grammatical structure of the given sentence and classify it as 'Acceptable' or 'Unacceptable':",
7
+ "atk_acc": 0.0,
8
+ "atk_prompt": "what the grammatical structure of the given sentence and consider it as 'Acceptable' or 'Unacceptable':",
9
+ "transfer_ori_acc": 50.5,
10
+ "transfer_atk_acc": 38.3
11
+ },
12
+ {
13
+ "dataset": "cola",
14
+ "type": "role",
15
+ "origin_acc": 80.0,
16
+ "origin_prompt": "While performing grammar analysis, classify the grammar of the following sentence as 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar:",
17
+ "atk_acc": 0.0,
18
+ "atk_prompt": "While performing grammar analysis, classify the grammar of the following sentence as 'acceptable' for so grammar or 'unacceptable' for abnormal grammar:",
19
+ "transfer_ori_acc": 27.0,
20
+ "transfer_atk_acc": 30.7
21
+ },
22
+ {
23
+ "dataset": "mrpc",
24
+ "type": "task",
25
+ "origin_acc": 75.5,
26
+ "origin_prompt": "Examine the pair of sentences and decide if their meanings are identical by answering with 'equivalent' or 'not_equivalent'.",
27
+ "atk_acc": 56.0,
28
+ "atk_prompt": "Examine the pairs of pronouns and evaluate if their meanings are different by answering with 'equivalent' or 'not_equivalent'. ",
29
+ "transfer_ori_acc": 64.2156862745098,
30
+ "transfer_atk_acc": 28.186274509803923
31
+ },
32
+ {
33
+ "dataset": "mrpc",
34
+ "type": "role",
35
+ "origin_acc": 75.5,
36
+ "origin_prompt": "In your capacity as a semantic assessment tool, evaluate the provided sentences and classify their meanings as 'equivalent' or 'not_equivalent'.",
37
+ "atk_acc": 0.0,
38
+ "atk_prompt": "In your capacity as a semantic assessment tool, evaluate the provided tense and classify their constituents as 'equivalent' or 'not_equivalent'. ",
39
+ "transfer_ori_acc": 48.03921568627451,
40
+ "transfer_atk_acc": 36.029411764705884
41
+ },
42
+ {
43
+ "dataset": "qnli",
44
+ "type": "task",
45
+ "origin_acc": 79.0,
46
+ "origin_prompt": "Assess if the answer to the question can be logically concluded from the provided context by choosing 'entailment' or 'not_entailment'.",
47
+ "atk_acc": 0.0,
48
+ "atk_prompt": "and if the answer to the question can be logically concluded from the provided context by avoiding 'entailment' or 'not_entailment'. ",
49
+ "transfer_ori_acc": 19.7,
50
+ "transfer_atk_acc": 2.5
51
+ },
52
+ {
53
+ "dataset": "qnli",
54
+ "type": "role",
55
+ "origin_acc": 79.5,
56
+ "origin_prompt": "As a textual inference expert, analyze if the answer to the question can be deduced from the provided context and select 'entailment' or 'not_entailment'.",
57
+ "atk_acc": 0.0,
58
+ "atk_prompt": "As a textual inference expert, analysis if the answer to the question can be deduced from the provided citation and define 'entailment' or 'not_entailment'. ",
59
+ "transfer_ori_acc": 32.5,
60
+ "transfer_atk_acc": 12.9
61
+ },
62
+ {
63
+ "dataset": "qqp",
64
+ "type": "task",
65
+ "origin_acc": 79.5,
66
+ "origin_prompt": "Can these two statements be considered equal in meaning? Answer with 'equivalent' or 'not_equivalent'.",
67
+ "atk_acc": 1.0,
68
+ "atk_prompt": "Can these two constructions be considered equal in meaning? object with 'equivalent' or 'not_equivalent'. ",
69
+ "transfer_ori_acc": 22.900000000000002,
70
+ "transfer_atk_acc": 2.3
71
+ },
72
+ {
73
+ "dataset": "qqp",
74
+ "type": "role",
75
+ "origin_acc": 82.5,
76
+ "origin_prompt": "As an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions.",
77
+ "atk_acc": 0.0,
78
+ "atk_prompt": "give an table for category comparison evaluation, consider the questions and determine if their concept is the same, responding with 'equivalent' for similarity questions or 'not_equivalent' for different knowledge. ",
79
+ "transfer_ori_acc": 26.200000000000003,
80
+ "transfer_atk_acc": 6.2
81
+ },
82
+ {
83
+ "dataset": "rte",
84
+ "type": "task",
85
+ "origin_acc": 82.5,
86
+ "origin_prompt": "Determine if the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment'.",
87
+ "atk_acc": 55.5,
88
+ "atk_prompt": "determine if the following singular of sentence indicating entailment or not_entailment. contrast with 'entailment' or 'not_entailment'.",
89
+ "transfer_ori_acc": 41.15523465703971,
90
+ "transfer_atk_acc": 0.36101083032490977
91
+ },
92
+ {
93
+ "dataset": "rte",
94
+ "type": "role",
95
+ "origin_acc": 81.5,
96
+ "origin_prompt": "Working as an entailment classifier, identify whether the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment':",
97
+ "atk_acc": 37.5,
98
+ "atk_prompt": "at as an entailment classifier, know which the given type of verb displays entailment or not_entailment. object with 'entailment' or 'not_entailment':",
99
+ "transfer_ori_acc": 49.81949458483754,
100
+ "transfer_atk_acc": 33.2129963898917
101
+ },
102
+ {
103
+ "dataset": "sst2",
104
+ "type": "task",
105
+ "origin_acc": 90.0,
106
+ "origin_prompt": "Review this statement and decide whether it has a 'positive' or 'negative' sentiment:",
107
+ "atk_acc": 0.0,
108
+ "atk_prompt": "what this assertion and are whether it has a 'positive' or 'negative' sentiment: ",
109
+ "transfer_ori_acc": 42.54587155963303,
110
+ "transfer_atk_acc": 27.75229357798165
111
+ },
112
+ {
113
+ "dataset": "sst2",
114
+ "type": "role",
115
+ "origin_acc": 93.0,
116
+ "origin_prompt": "Functioning as a sentiment identification tool, assess if the following expression is 'positive' or 'negative'. Classify:",
117
+ "atk_acc": 76.0,
118
+ "atk_prompt": "from as a sentiments identification tool, assess if the same word is 'positive' or 'negative'. function: ",
119
+ "transfer_ori_acc": 38.646788990825684,
120
+ "transfer_atk_acc": 19.495412844036696
121
+ },
122
+ {
123
+ "dataset": "wnli",
124
+ "type": "task",
125
+ "origin_acc": 69.01,
126
+ "origin_prompt": "Identify whether the given pair of sentences demonstrates entailment or not_entailment. Answer with 'entailment' or 'not_entailment'.",
127
+ "atk_acc": 47.89,
128
+ "atk_prompt": "Identify whether the given individual of verbs demonstrates entailment or not_entailment. have with 'entailment' or 'not_entailment'.",
129
+ "transfer_ori_acc": 33.80281690140845,
130
+ "transfer_atk_acc": 2.8169014084507045
131
+ },
132
+ {
133
+ "dataset": "wnli",
134
+ "type": "role",
135
+ "origin_acc": 60.56,
136
+ "origin_prompt": "Working as an entailment classifier, identify whether the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment':",
137
+ "atk_acc": 47.89,
138
+ "atk_prompt": "Working as an entailment classifier, identify whether the given corpus of verbs displays entailment or not_entailment. act with 'entailment' or 'not_entailment':",
139
+ "transfer_ori_acc": 43.66197183098591,
140
+ "transfer_atk_acc": 19.718309859154928
141
+ },
142
+ {
143
+ "dataset": "mmlu",
144
+ "type": "task",
145
+ "origin_acc": 63.5,
146
+ "origin_prompt": "Regarding the following multiple-choice question on {}, pick the correct answer from the options 'A', 'B', 'C', or 'D'.",
147
+ "atk_acc": 14.5,
148
+ "atk_prompt": "1 the first multiple-choice question on {}, remove the following answer from the options 'A', 'B', 'C', or 'D'. ",
149
+ "transfer_ori_acc": 18.439716312056735,
150
+ "transfer_atk_acc": 31.914893617021278
151
+ },
152
+ {
153
+ "dataset": "mmlu",
154
+ "type": "role",
155
+ "origin_acc": 62.0,
156
+ "origin_prompt": "Drawing upon your mastery of {}, please answer the multiple-choice question by selecting the correct option from 'A', 'B', 'C', or 'D'.",
157
+ "atk_acc": 1.0,
158
+ "atk_prompt": "Drawing upon your mastery of {}, simply question the multiple-selecting question by selecting the wrong color from 'A', 'B', 'C', or 'D'.",
159
+ "transfer_ori_acc": 22.340425531914892,
160
+ "transfer_atk_acc": 17.19858156028369
161
+ }
162
+ ]
results_transfer/chatgpt_vicuna-13b/bertattack_3_shot.json ADDED
@@ -0,0 +1,162 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "dataset": "cola",
4
+ "type": "task",
5
+ "origin_acc": 81.0,
6
+ "origin_prompt": "Analyze the provided sentence and classify its grammatical correctness as 'Acceptable' or 'Unacceptable'.",
7
+ "atk_acc": 59.0,
8
+ "atk_prompt": "Analyze the reported speech and classify its inclusive correcting as 'Acceptable' or 'Unacceptable'. ",
9
+ "transfer_ori_acc": 10.9,
10
+ "transfer_atk_acc": 28.299999999999997
11
+ },
12
+ {
13
+ "dataset": "cola",
14
+ "type": "role",
15
+ "origin_acc": 81.0,
16
+ "origin_prompt": "In the role of a syntax analyzer, examine the grammar of the provided sentence and decide if it is correct, answering with 'acceptable' for grammatically correct sentences or 'unacceptable' for incorrect ones.",
17
+ "atk_acc": 75.5,
18
+ "atk_prompt": "using the art of a level analyzeg, examine the grammar of the provided sentence and decide if it is genuine, answering with 'acceptable' for grammatically correct sentences or 'unacceptable' for incorrect ones. ",
19
+ "transfer_ori_acc": 3.4000000000000004,
20
+ "transfer_atk_acc": 14.899999999999999
21
+ },
22
+ {
23
+ "dataset": "mrpc",
24
+ "type": "task",
25
+ "origin_acc": 74.0,
26
+ "origin_prompt": "Compare the following sentences and determine if they share the same semantic meaning by responding with 'equivalent' or 'not_equivalent'.",
27
+ "atk_acc": 47.5,
28
+ "atk_prompt": "Compare the following sentences and determine if they lack the same aspect marking by respectively with 'equivalent' or 'not_equivalent'. ",
29
+ "transfer_ori_acc": 62.254901960784316,
30
+ "transfer_atk_acc": 58.82352941176471
31
+ },
32
+ {
33
+ "dataset": "mrpc",
34
+ "type": "role",
35
+ "origin_acc": 71.5,
36
+ "origin_prompt": "In the role of a textual comparison specialist, examine the given sentences and decide if they share the same meaning, responding with 'equivalent' or 'not_equivalent'.",
37
+ "atk_acc": 47.0,
38
+ "atk_prompt": "have the role of a textual comparison academic, examine the given sentences and determine if they share the same length, responding with 'equivalent' or 'not_equivalent'. ",
39
+ "transfer_ori_acc": 69.36274509803921,
40
+ "transfer_atk_acc": 67.15686274509804
41
+ },
42
+ {
43
+ "dataset": "qnli",
44
+ "type": "task",
45
+ "origin_acc": 84.0,
46
+ "origin_prompt": "Consider the context and question, and indicate if the answer can be logically deduced from the context by responding with 'entailment' or 'not_entailment'.",
47
+ "atk_acc": 71.0,
48
+ "atk_prompt": "put the contexts and question, and be if the answer can be logically deduced from the question by stating with 'entailment' or 'not_entailment'. ",
49
+ "transfer_ori_acc": 27.500000000000004,
50
+ "transfer_atk_acc": 11.3
51
+ },
52
+ {
53
+ "dataset": "qnli",
54
+ "type": "role",
55
+ "origin_acc": 84.0,
56
+ "origin_prompt": "As a textual inference expert, analyze if the answer to the question can be deduced from the provided context and select 'entailment' or 'not_entailment'.",
57
+ "atk_acc": 70.0,
58
+ "atk_prompt": "As a structural inference expert, analyze if the answer to the challenge can be deduced from the query audience and select 'entailment' or 'not_entailment'. ",
59
+ "transfer_ori_acc": 40.300000000000004,
60
+ "transfer_atk_acc": 32.6
61
+ },
62
+ {
63
+ "dataset": "qqp",
64
+ "type": "task",
65
+ "origin_acc": 82.5,
66
+ "origin_prompt": "Are the following two questions equivalent or not? Answer me with \"equivalent\" or \"not_equivalent\".",
67
+ "atk_acc": 78.5,
68
+ "atk_prompt": "Are the following versus answers equivalent or not? Answer me with \"equivalent\" or \"not_equivalent\". ",
69
+ "transfer_ori_acc": 34.8,
70
+ "transfer_atk_acc": 34.5
71
+ },
72
+ {
73
+ "dataset": "qqp",
74
+ "type": "role",
75
+ "origin_acc": 83.5,
76
+ "origin_prompt": "As an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions.",
77
+ "atk_acc": 78.0,
78
+ "atk_prompt": "and an instrument for question comparison analysis, consider the questions and establish if their topic is the same, responding with 'equivalent' for matching responses or 'not_equivalent' for between questions. ",
79
+ "transfer_ori_acc": 35.3,
80
+ "transfer_atk_acc": 28.599999999999998
81
+ },
82
+ {
83
+ "dataset": "rte",
84
+ "type": "task",
85
+ "origin_acc": 85.0,
86
+ "origin_prompt": "Review the two statements and categorize their relationship as either 'entailment' or 'not_entailment'.",
87
+ "atk_acc": 59.5,
88
+ "atk_prompt": "write the second statements and categorize their relationship as neither 'entailment' or 'not_entailment'. ",
89
+ "transfer_ori_acc": 50.54151624548736,
90
+ "transfer_atk_acc": 42.59927797833935
91
+ },
92
+ {
93
+ "dataset": "rte",
94
+ "type": "role",
95
+ "origin_acc": 83.5,
96
+ "origin_prompt": "While performing entailment analysis, classify the relationship between the provided sentences as 'entailment' or 'not_entailment'.",
97
+ "atk_acc": 79.0,
98
+ "atk_prompt": "was performing entailment analysis, classify the relationship between the first phrase as 'entailment' or 'not_entailment'. ",
99
+ "transfer_ori_acc": 32.49097472924188,
100
+ "transfer_atk_acc": 38.26714801444043
101
+ },
102
+ {
103
+ "dataset": "sst2",
104
+ "type": "task",
105
+ "origin_acc": 95.5,
106
+ "origin_prompt": "After examining the following expression, label its emotion as either 'positive' or 'negative'.",
107
+ "atk_acc": 94.5,
108
+ "atk_prompt": "After examining the following expression, label its emotion as either 'positive' or 'negative' Q47Jmd4lMV. ",
109
+ "transfer_ori_acc": 7.912844036697248,
110
+ "transfer_atk_acc": 0.11467889908256881
111
+ },
112
+ {
113
+ "dataset": "sst2",
114
+ "type": "role",
115
+ "origin_acc": 97.5,
116
+ "origin_prompt": "In the capacity of a sentiment classifier, decide whether the given quote is 'positive' or 'negative'.",
117
+ "atk_acc": 96.5,
118
+ "atk_prompt": "In the capacity of a sentiment classifier, decide whether the given quote is 'positive' or 'negative' 9Q3S1F94fE. ",
119
+ "transfer_ori_acc": 20.18348623853211,
120
+ "transfer_atk_acc": 20.18348623853211
121
+ },
122
+ {
123
+ "dataset": "wnli",
124
+ "type": "task",
125
+ "origin_acc": 69.01,
126
+ "origin_prompt": "Review the two statements and categorize their relationship as either 'entailment' or 'not_entailment'.",
127
+ "atk_acc": 59.15,
128
+ "atk_prompt": "examine the paired actions and categorize their relationship as is 'entailment' or 'not_entailment'. ",
129
+ "transfer_ori_acc": 16.901408450704224,
130
+ "transfer_atk_acc": 8.450704225352112
131
+ },
132
+ {
133
+ "dataset": "wnli",
134
+ "type": "role",
135
+ "origin_acc": 66.2,
136
+ "origin_prompt": "In the role of a semantic relationship analyzer, examine the connection between the given sentences and decide if they exhibit entailment or not_entailment. Answer with 'entailment' or 'not_entailment'.",
137
+ "atk_acc": 56.34,
138
+ "atk_prompt": "In the role of a semantic relationship analyzer, define the connection between the given links and decide if they display entailment or not_entailment. Answer with 'entailment' or 'not_entailment'. ",
139
+ "transfer_ori_acc": 5.633802816901409,
140
+ "transfer_atk_acc": 15.492957746478872
141
+ },
142
+ {
143
+ "dataset": "mmlu",
144
+ "type": "task",
145
+ "origin_acc": 66.5,
146
+ "origin_prompt": "Please respond to the multiple-choice question about {} by selecting the appropriate answer: 'A', 'B', 'C', or 'D'.",
147
+ "atk_acc": 11.5,
148
+ "atk_prompt": "at respond to the multiple-independent question about {} by selecting the inappropriate question: 'A', 'B', 'C', or 'D'. ",
149
+ "transfer_ori_acc": 8.865248226950355,
150
+ "transfer_atk_acc": 12.411347517730496
151
+ },
152
+ {
153
+ "dataset": "mmlu",
154
+ "type": "role",
155
+ "origin_acc": 65.5,
156
+ "origin_prompt": "Considering your familiarity with {}, attend to the following multiple-choice question by picking 'A', 'B', 'C', or 'D'.",
157
+ "atk_acc": 52.5,
158
+ "atk_prompt": "all your ability with {}, attend to the following multiple-select question by picking 'A', 'B', 'C', or 'D'.",
159
+ "transfer_ori_acc": 8.156028368794328,
160
+ "transfer_atk_acc": 7.2695035460992905
161
+ }
162
+ ]
results_transfer/chatgpt_vicuna-13b/checklist_0_shot.json ADDED
@@ -0,0 +1,162 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "dataset": "cola",
4
+ "type": "task",
5
+ "origin_acc": 78.5,
6
+ "origin_prompt": "Review the sentence below and identify whether its grammar is 'Acceptable' or 'Unacceptable':",
7
+ "atk_acc": 72.5,
8
+ "atk_prompt": "Review the sentence below and identify whether its grammar is 'Acceptable' or 'Unacceptable' zexKUTWJYG:",
9
+ "transfer_ori_acc": 28.9,
10
+ "transfer_atk_acc": 22.7
11
+ },
12
+ {
13
+ "dataset": "cola",
14
+ "type": "role",
15
+ "origin_acc": 79.0,
16
+ "origin_prompt": "While performing grammar analysis, classify the grammar of the following sentence as 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar:",
17
+ "atk_acc": 45.0,
18
+ "atk_prompt": "While performing grammar analysis, classify the grammar of the following sentence as 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar zexKUTWJYG:",
19
+ "transfer_ori_acc": 27.0,
20
+ "transfer_atk_acc": 38.6
21
+ },
22
+ {
23
+ "dataset": "mrpc",
24
+ "type": "task",
25
+ "origin_acc": 75.5,
26
+ "origin_prompt": "Evaluate whether the two provided sentences convey the same meaning by answering 'equivalent' or 'not_equivalent'.",
27
+ "atk_acc": 72.0,
28
+ "atk_prompt": "Evaluate whether the two provided sentences convey the same meaning by answering 'equivalent' or 'not_equivalent' SvAp8RlOFn. ",
29
+ "transfer_ori_acc": 48.77450980392157,
30
+ "transfer_atk_acc": 0.49019607843137253
31
+ },
32
+ {
33
+ "dataset": "mrpc",
34
+ "type": "role",
35
+ "origin_acc": 75.5,
36
+ "origin_prompt": "In your capacity as a semantic assessment tool, evaluate the provided sentences and classify their meanings as 'equivalent' or 'not_equivalent'.",
37
+ "atk_acc": 73.5,
38
+ "atk_prompt": "In your capacity as a semantic assessment tool, evaluate the provided sentences and classify their meanings as 'equivalent' or 'not_equivalent' Kw6nrs57gH. ",
39
+ "transfer_ori_acc": 48.03921568627451,
40
+ "transfer_atk_acc": 56.372549019607845
41
+ },
42
+ {
43
+ "dataset": "qnli",
44
+ "type": "task",
45
+ "origin_acc": 78.0,
46
+ "origin_prompt": "Evaluate whether the given context supports the answer to the question by responding with 'entailment' or 'not_entailment'.",
47
+ "atk_acc": 75.5,
48
+ "atk_prompt": "Evaluate whether the given context supports the answer to the question by responding with 'entailment' or 'not_entailment' ItGDrrA1Sc. ",
49
+ "transfer_ori_acc": 26.5,
50
+ "transfer_atk_acc": 16.5
51
+ },
52
+ {
53
+ "dataset": "qnli",
54
+ "type": "role",
55
+ "origin_acc": 78.0,
56
+ "origin_prompt": "As a semantic researcher, evaluate whether the provided context supports the answer to the question and choose 'entailment' or 'not_entailment'.",
57
+ "atk_acc": 74.5,
58
+ "atk_prompt": "As a semantic researcher, evaluate whether the provided context supports the answer to the question and choose 'entailment' or 'not_entailment' 6a4Yn3RGVc. ",
59
+ "transfer_ori_acc": 13.4,
60
+ "transfer_atk_acc": 11.200000000000001
61
+ },
62
+ {
63
+ "dataset": "qqp",
64
+ "type": "task",
65
+ "origin_acc": 81.5,
66
+ "origin_prompt": "Assess whether the following statements are identical in meaning by answering 'equivalent' or 'not_equivalent'.",
67
+ "atk_acc": 78.0,
68
+ "atk_prompt": "Assess whether the following statements are identical in meaning by answering 'equivalent' or 'not_equivalent' iia2yL9Uzm. ",
69
+ "transfer_ori_acc": 30.2,
70
+ "transfer_atk_acc": 26.900000000000002
71
+ },
72
+ {
73
+ "dataset": "qqp",
74
+ "type": "role",
75
+ "origin_acc": 83.0,
76
+ "origin_prompt": "As an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions.",
77
+ "atk_acc": 80.5,
78
+ "atk_prompt": "As an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions Cev0s49fbe. ",
79
+ "transfer_ori_acc": 26.200000000000003,
80
+ "transfer_atk_acc": 24.4
81
+ },
82
+ {
83
+ "dataset": "rte",
84
+ "type": "task",
85
+ "origin_acc": 82.5,
86
+ "origin_prompt": "Determine if the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment'.",
87
+ "atk_acc": 80.0,
88
+ "atk_prompt": "Determine if the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment' MSAddJ4D2a.",
89
+ "transfer_ori_acc": 41.15523465703971,
90
+ "transfer_atk_acc": 20.577617328519857
91
+ },
92
+ {
93
+ "dataset": "rte",
94
+ "type": "role",
95
+ "origin_acc": 81.5,
96
+ "origin_prompt": "As an instrument for entailment evaluation, consider the two sentences and determine if their relationship is 'entailment' or 'not_entailment'. Respond with 'entailment' or 'not_entailment':",
97
+ "atk_acc": 80.0,
98
+ "atk_prompt": "As an instrument for entailment evaluation, consider the two sentences and determine if their relationship is 'entailment' or 'not_entailment'. Respond with 'entailment' or 'not_entailment' 6a4Yn3RGVc:",
99
+ "transfer_ori_acc": 24.90974729241877,
100
+ "transfer_atk_acc": 6.859205776173286
101
+ },
102
+ {
103
+ "dataset": "sst2",
104
+ "type": "task",
105
+ "origin_acc": 93.0,
106
+ "origin_prompt": "Determine the overall sentiment of this sentence, categorizing it as 'positive' or 'negative':",
107
+ "atk_acc": 90.0,
108
+ "atk_prompt": "Determine the overall sentiment of this sentence, categorizing it as 'positive' or 'negative' AMsRIKZniY: ",
109
+ "transfer_ori_acc": 14.3348623853211,
110
+ "transfer_atk_acc": 6.8807339449541285
111
+ },
112
+ {
113
+ "dataset": "sst2",
114
+ "type": "role",
115
+ "origin_acc": 92.5,
116
+ "origin_prompt": "Functioning as a sentiment identification tool, assess if the following expression is 'positive' or 'negative'. Classify:",
117
+ "atk_acc": 92.0,
118
+ "atk_prompt": "Functioning as a sentiment identification tool, assess if the following expression is 'positive' or 'negative'. Classify Cev0s49fbe: ",
119
+ "transfer_ori_acc": 38.646788990825684,
120
+ "transfer_atk_acc": 33.48623853211009
121
+ },
122
+ {
123
+ "dataset": "wnli",
124
+ "type": "task",
125
+ "origin_acc": 64.79,
126
+ "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.",
127
+ "atk_acc": 59.15,
128
+ "atk_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word zexKUTWJYG. ",
129
+ "transfer_ori_acc": 25.352112676056336,
130
+ "transfer_atk_acc": 2.8169014084507045
131
+ },
132
+ {
133
+ "dataset": "wnli",
134
+ "type": "role",
135
+ "origin_acc": 63.38,
136
+ "origin_prompt": "In the role of a semantic relationship analyzer, examine the connection between the given sentences and decide if they exhibit entailment or not_entailment. Answer with 'entailment' or 'not_entailment':",
137
+ "atk_acc": 57.75,
138
+ "atk_prompt": "In the role of a semantic relationship analyzer, examine the connection between the given sentences and decide if they exhibit entailment or not_entailment. Answer with 'entailment' or 'not_entailment' 5XeflW1ZJc:",
139
+ "transfer_ori_acc": 43.66197183098591,
140
+ "transfer_atk_acc": 29.577464788732392
141
+ },
142
+ {
143
+ "dataset": "mmlu",
144
+ "type": "task",
145
+ "origin_acc": 64.0,
146
+ "origin_prompt": "Regarding the following multiple-choice question on {}, pick the correct answer from the options 'A', 'B', 'C', or 'D'.",
147
+ "atk_acc": 58.0,
148
+ "atk_prompt": "Regarding the following multiple-choice question on {}, pick the correct answer from the options 'A', 'B', 'C', or 'D' Pbl3tYuoRo. ",
149
+ "transfer_ori_acc": 18.439716312056735,
150
+ "transfer_atk_acc": 23.22695035460993
151
+ },
152
+ {
153
+ "dataset": "mmlu",
154
+ "type": "role",
155
+ "origin_acc": 61.5,
156
+ "origin_prompt": "As a skilled person in the field of {}, please respond to the multiple-choice question by choosing 'A', 'B', 'C', or 'D'.",
157
+ "atk_acc": 58.0,
158
+ "atk_prompt": "As a skilled person in the field of {}, please respond to the multiple-choice question by choosing 'A', 'B', 'C', or 'D' IhuBIhoPGc.",
159
+ "transfer_ori_acc": 20.0354609929078,
160
+ "transfer_atk_acc": 18.085106382978726
161
+ }
162
+ ]
results_transfer/chatgpt_vicuna-13b/checklist_3_shot.json ADDED
@@ -0,0 +1,162 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "dataset": "cola",
4
+ "type": "task",
5
+ "origin_acc": 81.0,
6
+ "origin_prompt": "Examine the sentence and decide if its grammar is 'Acceptable' or 'Unacceptable'.",
7
+ "atk_acc": 78.0,
8
+ "atk_prompt": "Examine the sentence and decide if its grammar is 'Acceptable' or 'Unacceptable' 9Q3S1F94fE. ",
9
+ "transfer_ori_acc": 41.8,
10
+ "transfer_atk_acc": 40.8
11
+ },
12
+ {
13
+ "dataset": "cola",
14
+ "type": "role",
15
+ "origin_acc": 81.5,
16
+ "origin_prompt": "Functioning as a grammar evaluation tool, analyze the given sentence and decide if it is grammatically correct, responding with 'acceptable' or 'unacceptable'.",
17
+ "atk_acc": 79.5,
18
+ "atk_prompt": "Functioning as a grammar evaluation tool, analyze the given sentence and decide if it is grammatically correct, responding with 'acceptable' or 'unacceptable' 6a4Yn3RGVc. ",
19
+ "transfer_ori_acc": 23.9,
20
+ "transfer_atk_acc": 13.4
21
+ },
22
+ {
23
+ "dataset": "mrpc",
24
+ "type": "task",
25
+ "origin_acc": 74.0,
26
+ "origin_prompt": "Assess if the two given sentences have equivalent meanings by selecting 'equivalent' or 'not_equivalent'.",
27
+ "atk_acc": 68.5,
28
+ "atk_prompt": "Assess if the two given sentences have equivalent meanings by selecting 'equivalent' or 'not_equivalent' LkF0FZxMZ4. ",
29
+ "transfer_ori_acc": 69.36274509803921,
30
+ "transfer_atk_acc": 64.2156862745098
31
+ },
32
+ {
33
+ "dataset": "mrpc",
34
+ "type": "role",
35
+ "origin_acc": 72.0,
36
+ "origin_prompt": "As a linguistic comparator, review the following pair of sentences and determine their semantic equivalence by choosing 'equivalent' or 'not_equivalent'.",
37
+ "atk_acc": 68.0,
38
+ "atk_prompt": "As a linguistic comparator, review the following pair of sentences and determine their semantic equivalence by choosing 'equivalent' or 'not_equivalent' bhrRSokrfa. ",
39
+ "transfer_ori_acc": 61.51960784313726,
40
+ "transfer_atk_acc": 65.68627450980392
41
+ },
42
+ {
43
+ "dataset": "qnli",
44
+ "type": "task",
45
+ "origin_acc": 84.5,
46
+ "origin_prompt": "Examine the context and question, and determine if the context logically implies the answer by selecting 'entailment' or 'not_entailment'.",
47
+ "atk_acc": 80.5,
48
+ "atk_prompt": "Examine the context and question, and determine if the context logically implies the answer by selecting 'entailment' or 'not_entailment' 9Q3S1F94fE. ",
49
+ "transfer_ori_acc": 26.900000000000002,
50
+ "transfer_atk_acc": 10.0
51
+ },
52
+ {
53
+ "dataset": "qnli",
54
+ "type": "role",
55
+ "origin_acc": 84.5,
56
+ "origin_prompt": "As a textual inference expert, analyze if the answer to the question can be deduced from the provided context and select 'entailment' or 'not_entailment'.",
57
+ "atk_acc": 81.0,
58
+ "atk_prompt": "As a textual inference expert, analyze if the answer to the question can be deduced from the provided context and select 'entailment' or 'not_entailment' YHQiDfFhw4. ",
59
+ "transfer_ori_acc": 40.300000000000004,
60
+ "transfer_atk_acc": 15.1
61
+ },
62
+ {
63
+ "dataset": "qqp",
64
+ "type": "task",
65
+ "origin_acc": 82.0,
66
+ "origin_prompt": "Can these two statements be considered equal in meaning? Answer with 'equivalent' or 'not_equivalent'.",
67
+ "atk_acc": 79.0,
68
+ "atk_prompt": "Can these two statements be considered equal in meaning? Answer with 'equivalent' or 'not_equivalent' xikCjkMydH. ",
69
+ "transfer_ori_acc": 36.3,
70
+ "transfer_atk_acc": 28.1
71
+ },
72
+ {
73
+ "dataset": "qqp",
74
+ "type": "role",
75
+ "origin_acc": 83.0,
76
+ "origin_prompt": "In the capacity of a question assessment system, indicate if the meaning of the provided questions is the same, responding with 'equivalent' or 'not_equivalent'.",
77
+ "atk_acc": 81.5,
78
+ "atk_prompt": "In the capacity of a question assessment system, indicate if the meaning of the provided questions is the same, responding with 'equivalent' or 'not_equivalent' 5XeflW1ZJc. ",
79
+ "transfer_ori_acc": 46.9,
80
+ "transfer_atk_acc": 31.4
81
+ },
82
+ {
83
+ "dataset": "rte",
84
+ "type": "task",
85
+ "origin_acc": 84.5,
86
+ "origin_prompt": "Review the two statements and categorize their relationship as either 'entailment' or 'not_entailment'.",
87
+ "atk_acc": 80.5,
88
+ "atk_prompt": "Review the two statements and categorize their relationship as either 'entailment' or 'not_entailment' 5XeflW1ZJc. ",
89
+ "transfer_ori_acc": 50.54151624548736,
90
+ "transfer_atk_acc": 48.375451263537904
91
+ },
92
+ {
93
+ "dataset": "rte",
94
+ "type": "role",
95
+ "origin_acc": 84.0,
96
+ "origin_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment'.",
97
+ "atk_acc": 80.5,
98
+ "atk_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment' bhrRSokrfa. ",
99
+ "transfer_ori_acc": 46.931407942238266,
100
+ "transfer_atk_acc": 45.48736462093863
101
+ },
102
+ {
103
+ "dataset": "sst2",
104
+ "type": "task",
105
+ "origin_acc": 96.0,
106
+ "origin_prompt": "Read the provided excerpt and choose between 'positive' and 'negative' to describe its sentiment.",
107
+ "atk_acc": 77.5,
108
+ "atk_prompt": "go the read excerpt and be between 'positive' and 'negative' to describing its mood. ",
109
+ "transfer_ori_acc": 1.261467889908257,
110
+ "transfer_atk_acc": 2.637614678899083
111
+ },
112
+ {
113
+ "dataset": "sst2",
114
+ "type": "role",
115
+ "origin_acc": 97.5,
116
+ "origin_prompt": "In the capacity of a sentiment classifier, decide whether the given quote is 'positive' or 'negative'.",
117
+ "atk_acc": 13.5,
118
+ "atk_prompt": "in the capacity of a sentiment classizer, chose neither the said quotes is 'positive' or 'negative'. ",
119
+ "transfer_ori_acc": 20.18348623853211,
120
+ "transfer_atk_acc": 21.788990825688074
121
+ },
122
+ {
123
+ "dataset": "wnli",
124
+ "type": "task",
125
+ "origin_acc": 69.01,
126
+ "origin_prompt": "Indicate if the connection between the following sentences is 'entailment' or 'not_entailment'.",
127
+ "atk_acc": 61.97,
128
+ "atk_prompt": "Indicate if the connection between the followping senZences is 'entailment' or 'not_entailment'. ",
129
+ "transfer_ori_acc": 9.859154929577464,
130
+ "transfer_atk_acc": 9.859154929577464
131
+ },
132
+ {
133
+ "dataset": "wnli",
134
+ "type": "role",
135
+ "origin_acc": 66.2,
136
+ "origin_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment'.",
137
+ "atk_acc": 57.75,
138
+ "atk_prompt": "In your role as an entailment analysis tool, asress the rlationship between the given sentences and classify it as 'entailment' or 'not_entailment'. ",
139
+ "transfer_ori_acc": 23.943661971830984,
140
+ "transfer_atk_acc": 18.30985915492958
141
+ },
142
+ {
143
+ "dataset": "mmlu",
144
+ "type": "task",
145
+ "origin_acc": 66.0,
146
+ "origin_prompt": "Please respond to the multiple-choice question about {} by selecting the appropriate answer: 'A', 'B', 'C', or 'D'.",
147
+ "atk_acc": 63.0,
148
+ "atk_prompt": "Please respond to the multiple-choice question about {} by selecting the appropriate answer: 'A', 'B', 'C', or 'D' MQnugHcaoy. ",
149
+ "transfer_ori_acc": 8.865248226950355,
150
+ "transfer_atk_acc": 4.787234042553192
151
+ },
152
+ {
153
+ "dataset": "mmlu",
154
+ "type": "role",
155
+ "origin_acc": 65.5,
156
+ "origin_prompt": "As someone well-versed in {}, please address the multiple-choice question below by selecting 'A', 'B', 'C', or 'D'.",
157
+ "atk_acc": 63.0,
158
+ "atk_prompt": "As someone well-versed in {}, please address the multiple-choice question below by selecting 'A', 'B', 'C', or 'D' mOJ9nEwT4f.",
159
+ "transfer_ori_acc": 8.51063829787234,
160
+ "transfer_atk_acc": 9.042553191489363
161
+ }
162
+ ]
results_transfer/chatgpt_vicuna-13b/deepwordbug_0_shot.json ADDED
@@ -0,0 +1,162 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "dataset": "cola",
4
+ "type": "task",
5
+ "origin_acc": 79.0,
6
+ "origin_prompt": "Review the sentence below and identify whether its grammar is 'Acceptable' or 'Unacceptable':",
7
+ "atk_acc": 74.5,
8
+ "atk_prompt": "Review the sentece below and identify whether its grammar is 'Acceptable' or 'Unacceptable':",
9
+ "transfer_ori_acc": 28.9,
10
+ "transfer_atk_acc": 42.0
11
+ },
12
+ {
13
+ "dataset": "cola",
14
+ "type": "role",
15
+ "origin_acc": 79.5,
16
+ "origin_prompt": "While performing grammar analysis, classify the grammar of the following sentence as 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar:",
17
+ "atk_acc": 0.0,
18
+ "atk_prompt": "nhile performing grammr analysis, classify the rgammar of the fllowing sentenZe as 'acceptable' for corFect Xrammar or 'unacceptable' for incorrect gramUar:",
19
+ "transfer_ori_acc": 27.0,
20
+ "transfer_atk_acc": 42.5
21
+ },
22
+ {
23
+ "dataset": "mrpc",
24
+ "type": "task",
25
+ "origin_acc": 74.0,
26
+ "origin_prompt": "Evaluate whether the two provided sentences convey the same meaning by answering 'equivalent' or 'not_equivalent'.",
27
+ "atk_acc": 70.0,
28
+ "atk_prompt": "Evaluate whether the two provRided sentnces convey the same meaning by answering 'equivalent' or 'not_equivalent'. ",
29
+ "transfer_ori_acc": 48.77450980392157,
30
+ "transfer_atk_acc": 29.411764705882355
31
+ },
32
+ {
33
+ "dataset": "mrpc",
34
+ "type": "role",
35
+ "origin_acc": 76.0,
36
+ "origin_prompt": "In your capacity as a semantic assessment tool, evaluate the provided sentences and classify their meanings as 'equivalent' or 'not_equivalent'.",
37
+ "atk_acc": 53.0,
38
+ "atk_prompt": "nI your capacity as a sematnic asseJssment tol, evaluate the prvoided senRtences and classify their meainngs as 'equivalent' or 'not_equivalent'. ",
39
+ "transfer_ori_acc": 48.03921568627451,
40
+ "transfer_atk_acc": 27.696078431372552
41
+ },
42
+ {
43
+ "dataset": "qnli",
44
+ "type": "task",
45
+ "origin_acc": 78.0,
46
+ "origin_prompt": "Evaluate whether the given context supports the answer to the question by responding with 'entailment' or 'not_entailment'.",
47
+ "atk_acc": 50.0,
48
+ "atk_prompt": "Exaluate whetxher the iven contet Supports the anwser to the queston by respEonding with 'entailment' or 'not_entailment'. ",
49
+ "transfer_ori_acc": 26.5,
50
+ "transfer_atk_acc": 29.7
51
+ },
52
+ {
53
+ "dataset": "qnli",
54
+ "type": "role",
55
+ "origin_acc": 79.5,
56
+ "origin_prompt": "As a textual inference expert, analyze if the answer to the question can be deduced from the provided context and select 'entailment' or 'not_entailment'.",
57
+ "atk_acc": 19.5,
58
+ "atk_prompt": "As a textula inference expert, aInalyze if the answer to the question can be deducud from the provided context and Melect 'entailment' or 'not_entailment'. ",
59
+ "transfer_ori_acc": 32.5,
60
+ "transfer_atk_acc": 32.0
61
+ },
62
+ {
63
+ "dataset": "qqp",
64
+ "type": "task",
65
+ "origin_acc": 82.0,
66
+ "origin_prompt": "Assess whether the following statements are identical in meaning by answering 'equivalent' or 'not_equivalent'.",
67
+ "atk_acc": 45.0,
68
+ "atk_prompt": "Assess whethdr the followng sLtatements are identocal in meuaning by answerinJ 'equivalent' or 'not_equivalent'. ",
69
+ "transfer_ori_acc": 30.2,
70
+ "transfer_atk_acc": 23.0
71
+ },
72
+ {
73
+ "dataset": "qqp",
74
+ "type": "role",
75
+ "origin_acc": 82.5,
76
+ "origin_prompt": "As an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions.",
77
+ "atk_acc": 79.5,
78
+ "atk_prompt": "Ao an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions. ",
79
+ "transfer_ori_acc": 26.200000000000003,
80
+ "transfer_atk_acc": 12.2
81
+ },
82
+ {
83
+ "dataset": "rte",
84
+ "type": "task",
85
+ "origin_acc": 83.5,
86
+ "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.",
87
+ "atk_acc": 79.0,
88
+ "atk_prompt": "ArYe the folowing two sentenes entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word. ",
89
+ "transfer_ori_acc": 27.075812274368232,
90
+ "transfer_atk_acc": 7.581227436823104
91
+ },
92
+ {
93
+ "dataset": "rte",
94
+ "type": "role",
95
+ "origin_acc": 80.5,
96
+ "origin_prompt": "As an instrument for entailment evaluation, consider the two sentences and determine if their relationship is 'entailment' or 'not_entailment'. Respond with 'entailment' or 'not_entailment':",
97
+ "atk_acc": 78.5,
98
+ "atk_prompt": "IAs an instrument for entailment evaluatioRn, consider the otwo sentences and determine if their relationship is 'entailment' or 'not_entailment'. Respond with 'entailment' or 'not_entailment':",
99
+ "transfer_ori_acc": 24.90974729241877,
100
+ "transfer_atk_acc": 32.851985559566785
101
+ },
102
+ {
103
+ "dataset": "sst2",
104
+ "type": "task",
105
+ "origin_acc": 93.0,
106
+ "origin_prompt": "Determine the overall sentiment of this sentence, categorizing it as 'positive' or 'negative':",
107
+ "atk_acc": 88.5,
108
+ "atk_prompt": "Deetrmine the overall sentLment of this sentence, categorizing it as 'positive' or 'negative': ",
109
+ "transfer_ori_acc": 14.3348623853211,
110
+ "transfer_atk_acc": 6.422018348623854
111
+ },
112
+ {
113
+ "dataset": "sst2",
114
+ "type": "role",
115
+ "origin_acc": 93.0,
116
+ "origin_prompt": "Functioning as a sentiment identification tool, assess if the following expression is 'positive' or 'negative'. Classify:",
117
+ "atk_acc": 85.0,
118
+ "atk_prompt": "uFnctioning as a sentmient identification tool, assess if the folGowing expression is 'positive' or 'negative'. Classify: ",
119
+ "transfer_ori_acc": 38.646788990825684,
120
+ "transfer_atk_acc": 30.84862385321101
121
+ },
122
+ {
123
+ "dataset": "wnli",
124
+ "type": "task",
125
+ "origin_acc": 66.2,
126
+ "origin_prompt": "Examine the pair of sentences and determine if they exhibit entailment or not_entailment. Answer with either 'entailment' or 'not_entailment'.",
127
+ "atk_acc": 53.52,
128
+ "atk_prompt": "Examine the Bair of snetences and determine if they exhibit entailment or not_entailment. Ancwer with eMther 'entailment' or 'not_entailment'.",
129
+ "transfer_ori_acc": 33.80281690140845,
130
+ "transfer_atk_acc": 25.352112676056336
131
+ },
132
+ {
133
+ "dataset": "wnli",
134
+ "type": "role",
135
+ "origin_acc": 63.38,
136
+ "origin_prompt": "In the role of a semantic relationship analyzer, examine the connection between the given sentences and decide if they exhibit entailment or not_entailment. Answer with 'entailment' or 'not_entailment':",
137
+ "atk_acc": 53.52,
138
+ "atk_prompt": "IQn the role of a semantic relationship analyzer, eamine the connection between the given sentences and decide if they exhbit entailment or not_entailment. Answer with 'entailment' or 'not_entailment':",
139
+ "transfer_ori_acc": 43.66197183098591,
140
+ "transfer_atk_acc": 43.66197183098591
141
+ },
142
+ {
143
+ "dataset": "mmlu",
144
+ "type": "task",
145
+ "origin_acc": 62.5,
146
+ "origin_prompt": "Regarding the following multiple-choice question on {}, pick the correct answer from the options 'A', 'B', 'C', or 'D'.",
147
+ "atk_acc": 60.0,
148
+ "atk_prompt": "Regarding the following multiple-choice question on {}, pick the correct answeWr from the options 'A', 'B', 'C', or 'D'. ",
149
+ "transfer_ori_acc": 18.439716312056735,
150
+ "transfer_atk_acc": 22.5177304964539
151
+ },
152
+ {
153
+ "dataset": "mmlu",
154
+ "type": "role",
155
+ "origin_acc": 62.5,
156
+ "origin_prompt": "With your knowledge of {}, tackle the following multiple-choice question by choosing 'A', 'B', 'C', or 'D'.",
157
+ "atk_acc": 59.0,
158
+ "atk_prompt": "With your knowledge of {}, tackle the follQwing multiple-choice question by choosing 'A', 'B', 'C', or 'D'.",
159
+ "transfer_ori_acc": 22.872340425531913,
160
+ "transfer_atk_acc": 23.93617021276596
161
+ }
162
+ ]
results_transfer/chatgpt_vicuna-13b/deepwordbug_3_shot.json ADDED
@@ -0,0 +1,162 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "dataset": "cola",
4
+ "type": "task",
5
+ "origin_acc": 80.5,
6
+ "origin_prompt": "Examine the given sentence and decide if it is grammatically sound. Answer with either 'Acceptable' or 'Unacceptable'.",
7
+ "atk_acc": 78.5,
8
+ "atk_prompt": "Examine the given sentence and decide if it is gramatically sound. nAswer with either 'Acceptable' or 'Unacceptable'. ",
9
+ "transfer_ori_acc": 0.8999999999999999,
10
+ "transfer_atk_acc": 0.4
11
+ },
12
+ {
13
+ "dataset": "cola",
14
+ "type": "role",
15
+ "origin_acc": 81.5,
16
+ "origin_prompt": "Functioning as a grammar evaluation tool, analyze the given sentence and decide if it is grammatically correct, responding with 'acceptable' or 'unacceptable'.",
17
+ "atk_acc": 78.5,
18
+ "atk_prompt": "Funtioning as a graMmar evaluation tool, analyzY the given sentence and decide if it is grammatically corLect, responding with 'acceptable' or 'unacceptable'. ",
19
+ "transfer_ori_acc": 23.9,
20
+ "transfer_atk_acc": 53.7
21
+ },
22
+ {
23
+ "dataset": "mrpc",
24
+ "type": "task",
25
+ "origin_acc": 74.5,
26
+ "origin_prompt": "Examine the pair of sentences and decide if their meanings are identical by answering with 'equivalent' or 'not_equivalent'.",
27
+ "atk_acc": 65.5,
28
+ "atk_prompt": "Examine the Kair of sentences and edcide if their meanijngs are ibentical by answering with 'equivalent' or 'not_equivalent'. ",
29
+ "transfer_ori_acc": 55.63725490196079,
30
+ "transfer_atk_acc": 68.87254901960785
31
+ },
32
+ {
33
+ "dataset": "mrpc",
34
+ "type": "role",
35
+ "origin_acc": 72.0,
36
+ "origin_prompt": "As a linguistic comparator, review the following pair of sentences and determine their semantic equivalence by choosing 'equivalent' or 'not_equivalent'.",
37
+ "atk_acc": 66.5,
38
+ "atk_prompt": "AV a linguistic comaprator, review the following pair of sentences and determine their semayntic equivalence by chhoosing 'equivalent' or 'not_equivalent'. ",
39
+ "transfer_ori_acc": 61.51960784313726,
40
+ "transfer_atk_acc": 64.2156862745098
41
+ },
42
+ {
43
+ "dataset": "qnli",
44
+ "type": "task",
45
+ "origin_acc": 85.0,
46
+ "origin_prompt": "Examine the context and question, and determine if the context logically implies the answer by selecting 'entailment' or 'not_entailment'.",
47
+ "atk_acc": 77.5,
48
+ "atk_prompt": "Examine the contedt and question, and determine if the cXontext logically implies the answer by selecting 'entailment' or 'not_entailment'. ",
49
+ "transfer_ori_acc": 26.900000000000002,
50
+ "transfer_atk_acc": 34.300000000000004
51
+ },
52
+ {
53
+ "dataset": "qnli",
54
+ "type": "role",
55
+ "origin_acc": 86.0,
56
+ "origin_prompt": "As a textual analyst, examine if the given context logically implies the answer to the question and indicate your decision with 'entailment' or 'not_entailment'.",
57
+ "atk_acc": 75.0,
58
+ "atk_prompt": "A a textual analyst, xamine if the gPven conSext logiclaly implies the asnwer to the qbuestion and indicate your decision with 'entailment' or 'not_entailment'. ",
59
+ "transfer_ori_acc": 31.4,
60
+ "transfer_atk_acc": 43.6
61
+ },
62
+ {
63
+ "dataset": "qqp",
64
+ "type": "task",
65
+ "origin_acc": 82.0,
66
+ "origin_prompt": "Can these two statements be considered equal in meaning? Answer with 'equivalent' or 'not_equivalent'.",
67
+ "atk_acc": 80.0,
68
+ "atk_prompt": "Can these two statements be considJred equal in meaning? Answer with 'equivalent' or 'not_equivalent'. ",
69
+ "transfer_ori_acc": 36.3,
70
+ "transfer_atk_acc": 29.7
71
+ },
72
+ {
73
+ "dataset": "qqp",
74
+ "type": "role",
75
+ "origin_acc": 83.0,
76
+ "origin_prompt": "Acting as a question equivalence instrument, determine if the provided questions are equivalent in meaning, answering with 'equivalent' for similar questions or 'not_equivalent' for dissimilar ones.",
77
+ "atk_acc": 81.5,
78
+ "atk_prompt": "Actinm as a question equivalence instrumetn, determine if the provided questions are equivalent in meaning, answering with 'equivalent' for similar questions or 'not_equivalent' for dissimilar ones. ",
79
+ "transfer_ori_acc": 34.599999999999994,
80
+ "transfer_atk_acc": 30.5
81
+ },
82
+ {
83
+ "dataset": "rte",
84
+ "type": "task",
85
+ "origin_acc": 85.0,
86
+ "origin_prompt": "Does the relationship between the given sentences represent entailment or not_entailment? Respond with 'entailment' or 'not_entailment'.",
87
+ "atk_acc": 80.5,
88
+ "atk_prompt": "Does the relationshBp between the given sentences repersent entailment or not_entailment? Respond with 'entailment' or 'not_entailment'. ",
89
+ "transfer_ori_acc": 46.20938628158845,
90
+ "transfer_atk_acc": 45.84837545126354
91
+ },
92
+ {
93
+ "dataset": "rte",
94
+ "type": "role",
95
+ "origin_acc": 84.5,
96
+ "origin_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment'.",
97
+ "atk_acc": 81.0,
98
+ "atk_prompt": "In your role as an entailment analysis toIol, aZssess the relationship between the given sentences and cassify it as 'entailment' or 'not_entailment'. ",
99
+ "transfer_ori_acc": 46.931407942238266,
100
+ "transfer_atk_acc": 40.794223826714806
101
+ },
102
+ {
103
+ "dataset": "sst2",
104
+ "type": "task",
105
+ "origin_acc": 97.0,
106
+ "origin_prompt": "Evaluate the sentiment of the given text and classify it as 'positive' or 'negative'.",
107
+ "atk_acc": 94.0,
108
+ "atk_prompt": "ENvaluate the senttment of the giOven text and classify it as 'positive' or 'negative'. ",
109
+ "transfer_ori_acc": 10.435779816513762,
110
+ "transfer_atk_acc": 2.408256880733945
111
+ },
112
+ {
113
+ "dataset": "sst2",
114
+ "type": "role",
115
+ "origin_acc": 97.0,
116
+ "origin_prompt": "As a sentiment classifier, determine whether the following text is 'positive' or 'negative'.",
117
+ "atk_acc": 94.0,
118
+ "atk_prompt": "qAs a setniment classifier, determnne whether the following text is 'positive' or 'negative'. ",
119
+ "transfer_ori_acc": 15.36697247706422,
120
+ "transfer_atk_acc": 7.912844036697248
121
+ },
122
+ {
123
+ "dataset": "wnli",
124
+ "type": "task",
125
+ "origin_acc": 67.61,
126
+ "origin_prompt": "Indicate if the connection between the following sentences is 'entailment' or 'not_entailment'.",
127
+ "atk_acc": 64.79,
128
+ "atk_prompt": "Indicate if the connection between the following sentences is 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true . ",
129
+ "transfer_ori_acc": 9.859154929577464,
130
+ "transfer_atk_acc": 0.0
131
+ },
132
+ {
133
+ "dataset": "wnli",
134
+ "type": "role",
135
+ "origin_acc": 64.79,
136
+ "origin_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment'.",
137
+ "atk_acc": 59.15,
138
+ "atk_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true . ",
139
+ "transfer_ori_acc": 23.943661971830984,
140
+ "transfer_atk_acc": 11.267605633802818
141
+ },
142
+ {
143
+ "dataset": "mmlu",
144
+ "type": "task",
145
+ "origin_acc": 66.5,
146
+ "origin_prompt": "In relation to the multiple-choice question on {}, please provide the accurate answer by choosing 'A', 'B', 'C', or 'D'.",
147
+ "atk_acc": 63.5,
148
+ "atk_prompt": "ISn ralation to the mulitple-choice question on {}, pleare provide the accurate answer by choosing 'A', 'B', 'C', or 'D'. ",
149
+ "transfer_ori_acc": 9.397163120567376,
150
+ "transfer_atk_acc": 5.141843971631205
151
+ },
152
+ {
153
+ "dataset": "mmlu",
154
+ "type": "role",
155
+ "origin_acc": 65.5,
156
+ "origin_prompt": "As someone well-versed in {}, please address the multiple-choice question below by selecting 'A', 'B', 'C', or 'D'.",
157
+ "atk_acc": 62.5,
158
+ "atk_prompt": "As someone well-versed in {}, please address the mulZtiple-choice question below by selecting 'A', 'B', 'C', or 'D'.",
159
+ "transfer_ori_acc": 8.51063829787234,
160
+ "transfer_atk_acc": 11.702127659574469
161
+ }
162
+ ]
results_transfer/chatgpt_vicuna-13b/stresstest_0_shot.json ADDED
@@ -0,0 +1,162 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "dataset": "cola",
4
+ "type": "task",
5
+ "origin_acc": 78.5,
6
+ "origin_prompt": "Review the sentence below and identify whether its grammar is 'Acceptable' or 'Unacceptable':",
7
+ "atk_acc": 32.5,
8
+ "atk_prompt": "Review the sentence below and identify whether its grammar is 'Acceptable' or 'Unacceptable' and true is true :",
9
+ "transfer_ori_acc": 28.9,
10
+ "transfer_atk_acc": 42.0
11
+ },
12
+ {
13
+ "dataset": "cola",
14
+ "type": "role",
15
+ "origin_acc": 79.5,
16
+ "origin_prompt": "While performing grammar analysis, classify the grammar of the following sentence as 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar:",
17
+ "atk_acc": 75.0,
18
+ "atk_prompt": "While performing grammar analysis, classify the grammar of the following sentence as 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar and true is true and true is true and true is true and true is true and true is true :",
19
+ "transfer_ori_acc": 27.0,
20
+ "transfer_atk_acc": 10.299999999999999
21
+ },
22
+ {
23
+ "dataset": "mrpc",
24
+ "type": "task",
25
+ "origin_acc": 77.5,
26
+ "origin_prompt": "Can the given sentences be considered semantically identical? Please reply with 'equivalent' or 'not_equivalent'.",
27
+ "atk_acc": 73.0,
28
+ "atk_prompt": "Can the given sentences be considered semantically identical? Please reply with 'equivalent' or 'not_equivalent' and false is not true . ",
29
+ "transfer_ori_acc": 53.431372549019606,
30
+ "transfer_atk_acc": 52.450980392156865
31
+ },
32
+ {
33
+ "dataset": "mrpc",
34
+ "type": "role",
35
+ "origin_acc": 76.0,
36
+ "origin_prompt": "In your capacity as a semantic assessment tool, evaluate the provided sentences and classify their meanings as 'equivalent' or 'not_equivalent'.",
37
+ "atk_acc": 15.5,
38
+ "atk_prompt": "In your capacity as a semantic assessment tool, evaluate the provided sentences and classify their meanings as 'equivalent' or 'not_equivalent' and true is true . ",
39
+ "transfer_ori_acc": 48.03921568627451,
40
+ "transfer_atk_acc": 62.745098039215684
41
+ },
42
+ {
43
+ "dataset": "qnli",
44
+ "type": "task",
45
+ "origin_acc": 78.5,
46
+ "origin_prompt": "Evaluate whether the given context supports the answer to the question by responding with 'entailment' or 'not_entailment'.",
47
+ "atk_acc": 74.5,
48
+ "atk_prompt": "Evaluate whether the given context supports the answer to the question by responding with 'entailment' or 'not_entailment' and true is true . ",
49
+ "transfer_ori_acc": 26.5,
50
+ "transfer_atk_acc": 37.0
51
+ },
52
+ {
53
+ "dataset": "qnli",
54
+ "type": "role",
55
+ "origin_acc": 79.5,
56
+ "origin_prompt": "As a textual inference expert, analyze if the answer to the question can be deduced from the provided context and select 'entailment' or 'not_entailment'.",
57
+ "atk_acc": 69.0,
58
+ "atk_prompt": "As a textual inference expert, analyze if the answer to the question can be deduced from the provided context and select 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true . ",
59
+ "transfer_ori_acc": 32.5,
60
+ "transfer_atk_acc": 29.599999999999998
61
+ },
62
+ {
63
+ "dataset": "qqp",
64
+ "type": "task",
65
+ "origin_acc": 80.0,
66
+ "origin_prompt": "Can these two statements be considered equal in meaning? Answer with 'equivalent' or 'not_equivalent'.",
67
+ "atk_acc": 72.5,
68
+ "atk_prompt": "Can these two statements be considered equal in meaning? Answer with 'equivalent' or 'not_equivalent' and false is not true . ",
69
+ "transfer_ori_acc": 22.900000000000002,
70
+ "transfer_atk_acc": 28.599999999999998
71
+ },
72
+ {
73
+ "dataset": "qqp",
74
+ "type": "role",
75
+ "origin_acc": 81.0,
76
+ "origin_prompt": "Functioning as a question similarity evaluation tool, analyze the given questions and decide if they share the same meaning, responding with 'equivalent' or 'not_equivalent'.",
77
+ "atk_acc": 80.5,
78
+ "atk_prompt": "Functioning as a question similarity evaluation tool, analyze the given questions and decide if they share the same meaning, responding with 'equivalent' or 'not_equivalent' and true is true and true is true and true is true and true is true and true is true . ",
79
+ "transfer_ori_acc": 14.6,
80
+ "transfer_atk_acc": 23.400000000000002
81
+ },
82
+ {
83
+ "dataset": "rte",
84
+ "type": "task",
85
+ "origin_acc": 83.5,
86
+ "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.",
87
+ "atk_acc": 81.5,
88
+ "atk_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word and true is true . ",
89
+ "transfer_ori_acc": 27.075812274368232,
90
+ "transfer_atk_acc": 7.2202166064981945
91
+ },
92
+ {
93
+ "dataset": "rte",
94
+ "type": "role",
95
+ "origin_acc": 81.5,
96
+ "origin_prompt": "As an instrument for entailment evaluation, consider the two sentences and determine if their relationship is 'entailment' or 'not_entailment'. Respond with 'entailment' or 'not_entailment':",
97
+ "atk_acc": 80.5,
98
+ "atk_prompt": "As an instrument for entailment evaluation, consider the two sentences and determine if their relationship is 'entailment' or 'not_entailment'. Respond with 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true :",
99
+ "transfer_ori_acc": 24.90974729241877,
100
+ "transfer_atk_acc": 37.18411552346571
101
+ },
102
+ {
103
+ "dataset": "sst2",
104
+ "type": "task",
105
+ "origin_acc": 91.0,
106
+ "origin_prompt": "Review this statement and decide whether it has a 'positive' or 'negative' sentiment:",
107
+ "atk_acc": 0.0,
108
+ "atk_prompt": "Review this statement and decide whether it has a 'positive' or 'negative' sentiment and true is true : ",
109
+ "transfer_ori_acc": 42.54587155963303,
110
+ "transfer_atk_acc": 49.77064220183486
111
+ },
112
+ {
113
+ "dataset": "sst2",
114
+ "type": "role",
115
+ "origin_acc": 93.5,
116
+ "origin_prompt": "Functioning as a sentiment identification tool, assess if the following expression is 'positive' or 'negative'. Classify:",
117
+ "atk_acc": 93.0,
118
+ "atk_prompt": "Functioning as a sentiment identification tool, assess if the following expression is 'positive' or 'negative'. Classify and false is not true : ",
119
+ "transfer_ori_acc": 38.646788990825684,
120
+ "transfer_atk_acc": 36.353211009174316
121
+ },
122
+ {
123
+ "dataset": "wnli",
124
+ "type": "task",
125
+ "origin_acc": 61.97,
126
+ "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.",
127
+ "atk_acc": 63.38,
128
+ "atk_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word and true is true . ",
129
+ "transfer_ori_acc": 25.352112676056336,
130
+ "transfer_atk_acc": 4.225352112676056
131
+ },
132
+ {
133
+ "dataset": "wnli",
134
+ "type": "role",
135
+ "origin_acc": 63.38,
136
+ "origin_prompt": "In the role of a semantic relationship analyzer, examine the connection between the given sentences and decide if they exhibit entailment or not_entailment. Answer with 'entailment' or 'not_entailment':",
137
+ "atk_acc": 59.15,
138
+ "atk_prompt": "In the role of a semantic relationship analyzer, examine the connection between the given sentences and decide if they exhibit entailment or not_entailment. Answer with 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true :",
139
+ "transfer_ori_acc": 43.66197183098591,
140
+ "transfer_atk_acc": 30.985915492957744
141
+ },
142
+ {
143
+ "dataset": "mmlu",
144
+ "type": "task",
145
+ "origin_acc": 63.0,
146
+ "origin_prompt": "For each multiple-choice question about {}, identify the correct answer by selecting 'A', 'B', 'C', or 'D'.",
147
+ "atk_acc": 53.5,
148
+ "atk_prompt": "For each multiple-choice question about {}, identify the correct answer by selecting 'A', 'B', 'C', or 'D' and true is true . ",
149
+ "transfer_ori_acc": 22.69503546099291,
150
+ "transfer_atk_acc": 27.127659574468083
151
+ },
152
+ {
153
+ "dataset": "mmlu",
154
+ "type": "role",
155
+ "origin_acc": 61.5,
156
+ "origin_prompt": "As a skilled person in the field of {}, please respond to the multiple-choice question by choosing 'A', 'B', 'C', or 'D'.",
157
+ "atk_acc": 57.0,
158
+ "atk_prompt": "As a skilled person in the field of {}, please respond to the multiple-choice question by choosing 'A', 'B', 'C', or 'D' and false is not true .",
159
+ "transfer_ori_acc": 20.0354609929078,
160
+ "transfer_atk_acc": 20.212765957446805
161
+ }
162
+ ]
results_transfer/chatgpt_vicuna-13b/stresstest_3_shot.json ADDED
@@ -0,0 +1,162 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "dataset": "cola",
4
+ "type": "task",
5
+ "origin_acc": 80.5,
6
+ "origin_prompt": "Examine the given sentence and decide if it is grammatically sound. Answer with either 'Acceptable' or 'Unacceptable'.",
7
+ "atk_acc": 79.5,
8
+ "atk_prompt": "Examine the given sentence and decide if it is grammatically sound. Answer with either 'Acceptable' or 'Unacceptable' and true is true and true is true and true is true and true is true and true is true . ",
9
+ "transfer_ori_acc": 0.8999999999999999,
10
+ "transfer_atk_acc": 0.4
11
+ },
12
+ {
13
+ "dataset": "cola",
14
+ "type": "role",
15
+ "origin_acc": 81.5,
16
+ "origin_prompt": "Functioning as a grammar evaluation tool, analyze the given sentence and decide if it is grammatically correct, responding with 'acceptable' or 'unacceptable'.",
17
+ "atk_acc": 80.5,
18
+ "atk_prompt": "Functioning as a grammar evaluation tool, analyze the given sentence and decide if it is grammatically correct, responding with 'acceptable' or 'unacceptable' and false is not true . ",
19
+ "transfer_ori_acc": 23.9,
20
+ "transfer_atk_acc": 9.4
21
+ },
22
+ {
23
+ "dataset": "mrpc",
24
+ "type": "task",
25
+ "origin_acc": 73.5,
26
+ "origin_prompt": "Assess if the two given sentences have equivalent meanings by selecting 'equivalent' or 'not_equivalent'.",
27
+ "atk_acc": 71.5,
28
+ "atk_prompt": "Assess if the two given sentences have equivalent meanings by selecting 'equivalent' or 'not_equivalent' and true is true and true is true and true is true and true is true and true is true . ",
29
+ "transfer_ori_acc": 69.36274509803921,
30
+ "transfer_atk_acc": 69.11764705882352
31
+ },
32
+ {
33
+ "dataset": "mrpc",
34
+ "type": "role",
35
+ "origin_acc": 72.5,
36
+ "origin_prompt": "In the role of a textual comparison specialist, examine the given sentences and decide if they share the same meaning, responding with 'equivalent' or 'not_equivalent'.",
37
+ "atk_acc": 70.0,
38
+ "atk_prompt": "In the role of a textual comparison specialist, examine the given sentences and decide if they share the same meaning, responding with 'equivalent' or 'not_equivalent' and true is true . ",
39
+ "transfer_ori_acc": 69.36274509803921,
40
+ "transfer_atk_acc": 63.48039215686274
41
+ },
42
+ {
43
+ "dataset": "qnli",
44
+ "type": "task",
45
+ "origin_acc": 85.0,
46
+ "origin_prompt": "Examine the context and question, and determine if the context logically implies the answer by selecting 'entailment' or 'not_entailment'.",
47
+ "atk_acc": 79.5,
48
+ "atk_prompt": "Examine the context and question, and determine if the context logically implies the answer by selecting 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true . ",
49
+ "transfer_ori_acc": 26.900000000000002,
50
+ "transfer_atk_acc": 11.3
51
+ },
52
+ {
53
+ "dataset": "qnli",
54
+ "type": "role",
55
+ "origin_acc": 84.0,
56
+ "origin_prompt": "As a semantic researcher, evaluate whether the provided context supports the answer to the question and choose 'entailment' or 'not_entailment'.",
57
+ "atk_acc": 81.5,
58
+ "atk_prompt": "As a semantic researcher, evaluate whether the provided context supports the answer to the question and choose 'entailment' or 'not_entailment' and false is not true . ",
59
+ "transfer_ori_acc": 41.6,
60
+ "transfer_atk_acc": 31.7
61
+ },
62
+ {
63
+ "dataset": "qqp",
64
+ "type": "task",
65
+ "origin_acc": 81.5,
66
+ "origin_prompt": "Can these two statements be considered equal in meaning? Answer with 'equivalent' or 'not_equivalent'.",
67
+ "atk_acc": 81.0,
68
+ "atk_prompt": "Can these two statements be considered equal in meaning? Answer with 'equivalent' or 'not_equivalent' and false is not true . ",
69
+ "transfer_ori_acc": 36.3,
70
+ "transfer_atk_acc": 41.699999999999996
71
+ },
72
+ {
73
+ "dataset": "qqp",
74
+ "type": "role",
75
+ "origin_acc": 83.0,
76
+ "origin_prompt": "As an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions.",
77
+ "atk_acc": 82.0,
78
+ "atk_prompt": "As an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions and true is true and true is true and true is true and true is true and true is true . ",
79
+ "transfer_ori_acc": 35.3,
80
+ "transfer_atk_acc": 40.2
81
+ },
82
+ {
83
+ "dataset": "rte",
84
+ "type": "task",
85
+ "origin_acc": 85.0,
86
+ "origin_prompt": "Does the relationship between the given sentences represent entailment or not_entailment? Respond with 'entailment' or 'not_entailment'.",
87
+ "atk_acc": 82.0,
88
+ "atk_prompt": "Does the relationship between the given sentences represent entailment or not_entailment? Respond with 'entailment' or 'not_entailment' and false is not true . ",
89
+ "transfer_ori_acc": 46.20938628158845,
90
+ "transfer_atk_acc": 48.014440433213
91
+ },
92
+ {
93
+ "dataset": "rte",
94
+ "type": "role",
95
+ "origin_acc": 83.0,
96
+ "origin_prompt": "As an entailment identification system, examine the connection between the following sentences and respond with 'entailment' or 'not_entailment'.",
97
+ "atk_acc": 83.0,
98
+ "atk_prompt": "As an entailment identification system, examine the connection between the following sentences and respond with 'entailment' or 'not_entailment' and false is not true . ",
99
+ "transfer_ori_acc": 46.20938628158845,
100
+ "transfer_atk_acc": 41.15523465703971
101
+ },
102
+ {
103
+ "dataset": "sst2",
104
+ "type": "task",
105
+ "origin_acc": 97.0,
106
+ "origin_prompt": "Evaluate the sentiment of the given text and classify it as 'positive' or 'negative'.",
107
+ "atk_acc": 95.5,
108
+ "atk_prompt": "Evaluate the sentiment of the given text and classify it as 'positive' or 'negative' and true is true . ",
109
+ "transfer_ori_acc": 10.435779816513762,
110
+ "transfer_atk_acc": 1.94954128440367
111
+ },
112
+ {
113
+ "dataset": "sst2",
114
+ "type": "role",
115
+ "origin_acc": 97.0,
116
+ "origin_prompt": "As a sentiment classifier, determine whether the following text is 'positive' or 'negative'.",
117
+ "atk_acc": 96.0,
118
+ "atk_prompt": "As a sentiment classifier, determine whether the following text is 'positive' or 'negative' and false is not true . ",
119
+ "transfer_ori_acc": 15.36697247706422,
120
+ "transfer_atk_acc": 8.486238532110093
121
+ },
122
+ {
123
+ "dataset": "wnli",
124
+ "type": "task",
125
+ "origin_acc": 67.61,
126
+ "origin_prompt": "Indicate if the connection between the following sentences is 'entailment' or 'not_entailment'.",
127
+ "atk_acc": 60.56,
128
+ "atk_prompt": "Indicate if the connection between the later sentences is 'entailment' or 'not_entailment'. ",
129
+ "transfer_ori_acc": 9.859154929577464,
130
+ "transfer_atk_acc": 9.859154929577464
131
+ },
132
+ {
133
+ "dataset": "wnli",
134
+ "type": "role",
135
+ "origin_acc": 66.2,
136
+ "origin_prompt": "In the role of a semantic relationship analyzer, examine the connection between the given sentences and decide if they exhibit entailment or not_entailment. Answer with 'entailment' or 'not_entailment'.",
137
+ "atk_acc": 59.15,
138
+ "atk_prompt": "In the role of a semantic relationship analyzer, investigated the linking between the given sentences and decide if they exhibit entailment or not_entailment. Answer with 'entailment' or 'not_entailment'. ",
139
+ "transfer_ori_acc": 5.633802816901409,
140
+ "transfer_atk_acc": 12.676056338028168
141
+ },
142
+ {
143
+ "dataset": "mmlu",
144
+ "type": "task",
145
+ "origin_acc": 66.5,
146
+ "origin_prompt": "Please respond to the multiple-choice question about {} by selecting the appropriate answer: 'A', 'B', 'C', or 'D'.",
147
+ "atk_acc": 61.5,
148
+ "atk_prompt": "Please respond to the multiple-choice question about {} by selecting the appropriate answer: 'A', 'B', 'C', or 'D' and false is not true . ",
149
+ "transfer_ori_acc": 8.865248226950355,
150
+ "transfer_atk_acc": 11.52482269503546
151
+ },
152
+ {
153
+ "dataset": "mmlu",
154
+ "type": "role",
155
+ "origin_acc": 65.5,
156
+ "origin_prompt": "As someone well-versed in {}, please address the multiple-choice question below by selecting 'A', 'B', 'C', or 'D'.",
157
+ "atk_acc": 59.5,
158
+ "atk_prompt": "As someone well-versed in {}, please address the multiple-choice question below by selecting 'A', 'B', 'C', or 'D' and false is not true .",
159
+ "transfer_ori_acc": 8.51063829787234,
160
+ "transfer_atk_acc": 15.602836879432624
161
+ }
162
+ ]
results_transfer/chatgpt_vicuna-13b/textbugger_0_shot.json ADDED
@@ -0,0 +1,162 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "dataset": "cola",
4
+ "type": "task",
5
+ "origin_acc": 77.5,
6
+ "origin_prompt": "Assess the grammatical structure of the given sentence and classify it as 'Acceptable' or 'Unacceptable':",
7
+ "atk_acc": 6.5,
8
+ "atk_prompt": "As sess the garmmatical structure of the conferred sentence and classify it as 'Acceptable' or 'Unacceptable':",
9
+ "transfer_ori_acc": 50.5,
10
+ "transfer_atk_acc": 49.5
11
+ },
12
+ {
13
+ "dataset": "cola",
14
+ "type": "role",
15
+ "origin_acc": 79.5,
16
+ "origin_prompt": "While performing grammar analysis, classify the grammar of the following sentence as 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar:",
17
+ "atk_acc": 9.5,
18
+ "atk_prompt": "While prforming grammar analyzing, classify the grammar of the follows sentеnce as 'acceptable' for correcting gammar or 'unacceptable' for incorrect ɡrammar:",
19
+ "transfer_ori_acc": 27.0,
20
+ "transfer_atk_acc": 32.2
21
+ },
22
+ {
23
+ "dataset": "mrpc",
24
+ "type": "task",
25
+ "origin_acc": 75.0,
26
+ "origin_prompt": "Evaluate whether the two provided sentences convey the same meaning by answering 'equivalent' or 'not_equivalent'.",
27
+ "atk_acc": 67.5,
28
+ "atk_prompt": "Evaluate wհether the both provided sentences convey the same meaninɡ by answeriոg 'equivalent' or 'not_equivalent'. ",
29
+ "transfer_ori_acc": 48.77450980392157,
30
+ "transfer_atk_acc": 26.47058823529412
31
+ },
32
+ {
33
+ "dataset": "mrpc",
34
+ "type": "role",
35
+ "origin_acc": 75.0,
36
+ "origin_prompt": "In your capacity as a semantic assessment tool, evaluate the provided sentences and classify their meanings as 'equivalent' or 'not_equivalent'.",
37
+ "atk_acc": 9.0,
38
+ "atk_prompt": "In your capacities as a semɑntic assessment tool, evaluate the provdied penalties and classify their meaning as 'equivalent' or 'not_equivalent'. ",
39
+ "transfer_ori_acc": 48.03921568627451,
40
+ "transfer_atk_acc": 35.53921568627451
41
+ },
42
+ {
43
+ "dataset": "qnli",
44
+ "type": "task",
45
+ "origin_acc": 77.0,
46
+ "origin_prompt": "Consider the context and question, and indicate if the answer can be logically deduced from the context by responding with 'entailment' or 'not_entailment'.",
47
+ "atk_acc": 57.0,
48
+ "atk_prompt": "Considеr the context and question, and idicate if the respond can be logically ԁeduced from the context by ⲅesponding with 'entailment' or 'not_entailment'. ",
49
+ "transfer_ori_acc": 32.9,
50
+ "transfer_atk_acc": 5.800000000000001
51
+ },
52
+ {
53
+ "dataset": "qnli",
54
+ "type": "role",
55
+ "origin_acc": 78.5,
56
+ "origin_prompt": "As a textual inference expert, analyze if the answer to the question can be deduced from the provided context and select 'entailment' or 'not_entailment'.",
57
+ "atk_acc": 20.0,
58
+ "atk_prompt": "Because a textual inference expert, analysing if the answeⲅ to the question can be deduced from the providеd contex𝚝 and choosing 'entailment' or 'not_entailment'. ",
59
+ "transfer_ori_acc": 32.5,
60
+ "transfer_atk_acc": 2.9000000000000004
61
+ },
62
+ {
63
+ "dataset": "qqp",
64
+ "type": "task",
65
+ "origin_acc": 81.5,
66
+ "origin_prompt": "Assess whether the following statements are identical in meaning by answering 'equivalent' or 'not_equivalent'.",
67
+ "atk_acc": 44.5,
68
+ "atk_prompt": "Assess whether the following statements are analogous in maning by answeⲅing 'equivalent' or 'not_equivalent'. ",
69
+ "transfer_ori_acc": 30.2,
70
+ "transfer_atk_acc": 13.200000000000001
71
+ },
72
+ {
73
+ "dataset": "qqp",
74
+ "type": "role",
75
+ "origin_acc": 83.0,
76
+ "origin_prompt": "As an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions.",
77
+ "atk_acc": 77.5,
78
+ "atk_prompt": "As an instrument for problem comparison evɑluation, consider the ques𝚝ions and determine if their meaning is the same, respond with 'equivalent' for similar questiоns or 'not_equivalent' for different questions. ",
79
+ "transfer_ori_acc": 26.200000000000003,
80
+ "transfer_atk_acc": 7.3
81
+ },
82
+ {
83
+ "dataset": "rte",
84
+ "type": "task",
85
+ "origin_acc": 83.0,
86
+ "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.",
87
+ "atk_acc": 78.5,
88
+ "atk_prompt": "Be the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word. ",
89
+ "transfer_ori_acc": 27.075812274368232,
90
+ "transfer_atk_acc": 3.9711191335740073
91
+ },
92
+ {
93
+ "dataset": "rte",
94
+ "type": "role",
95
+ "origin_acc": 81.0,
96
+ "origin_prompt": "Working as an entailment classifier, identify whether the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment':",
97
+ "atk_acc": 74.5,
98
+ "atk_prompt": "Working as an entailment classifier, detecting if the given torque of sentences displays entailment or not_entailment. Answering with 'entailment' or 'not_entailment':",
99
+ "transfer_ori_acc": 49.81949458483754,
100
+ "transfer_atk_acc": 25.63176895306859
101
+ },
102
+ {
103
+ "dataset": "sst2",
104
+ "type": "task",
105
+ "origin_acc": 90.0,
106
+ "origin_prompt": "Review this statement and decide whether it has a 'positive' or 'negative' sentiment:",
107
+ "atk_acc": 82.5,
108
+ "atk_prompt": "Review this speeches and decide if it has a 'positive' or 'negative' sentiment: ",
109
+ "transfer_ori_acc": 42.54587155963303,
110
+ "transfer_atk_acc": 27.75229357798165
111
+ },
112
+ {
113
+ "dataset": "sst2",
114
+ "type": "role",
115
+ "origin_acc": 93.5,
116
+ "origin_prompt": "Acting as a sentiment evaluator, identify if the given sentence is 'positive' or 'negative'. Classify:",
117
+ "atk_acc": 86.0,
118
+ "atk_prompt": "Atcing as a sentiemnt evalua𝚝or, detecting if the given sentence is 'positive' or 'negative'. Classify: ",
119
+ "transfer_ori_acc": 21.90366972477064,
120
+ "transfer_atk_acc": 26.949541284403672
121
+ },
122
+ {
123
+ "dataset": "wnli",
124
+ "type": "task",
125
+ "origin_acc": 67.61,
126
+ "origin_prompt": "Examine the pair of sentences and determine if they exhibit entailment or not_entailment. Answer with either 'entailment' or 'not_entailment'.",
127
+ "atk_acc": 52.11,
128
+ "atk_prompt": "Examine the paiⲅ of sentences and determines if they exhibit entailment or not_entailment. Answer with neither 'entailment' or 'not_entailment'.",
129
+ "transfer_ori_acc": 33.80281690140845,
130
+ "transfer_atk_acc": 0.0
131
+ },
132
+ {
133
+ "dataset": "wnli",
134
+ "type": "role",
135
+ "origin_acc": 60.56,
136
+ "origin_prompt": "Working as an entailment classifier, identify whether the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment':",
137
+ "atk_acc": 52.11,
138
+ "atk_prompt": "Working as an entailment classifier, detects whether the given pair of sentences displays entailment or not_entailment. Answering with 'entailment' or 'not_entailment':",
139
+ "transfer_ori_acc": 43.66197183098591,
140
+ "transfer_atk_acc": 35.2112676056338
141
+ },
142
+ {
143
+ "dataset": "mmlu",
144
+ "type": "task",
145
+ "origin_acc": 62.5,
146
+ "origin_prompt": "Regarding the following multiple-choice question on {}, pick the correct answer from the options 'A', 'B', 'C', or 'D'.",
147
+ "atk_acc": 51.0,
148
+ "atk_prompt": "Regarding the hereafter multiple-choice question on {}, piϲk the corrects answers from the options 'A', 'B', 'C', or 'D'. ",
149
+ "transfer_ori_acc": 18.439716312056735,
150
+ "transfer_atk_acc": 26.95035460992908
151
+ },
152
+ {
153
+ "dataset": "mmlu",
154
+ "type": "role",
155
+ "origin_acc": 62.5,
156
+ "origin_prompt": "With your knowledge of {}, tackle the following multiple-choice question by choosing 'A', 'B', 'C', or 'D'.",
157
+ "atk_acc": 57.5,
158
+ "atk_prompt": "Witհ your knowledge of {}, confront the following multipⅼe-choice question by select 'A', 'B', 'C', or 'D'.",
159
+ "transfer_ori_acc": 22.872340425531913,
160
+ "transfer_atk_acc": 27.482269503546096
161
+ }
162
+ ]
results_transfer/chatgpt_vicuna-13b/textbugger_3_shot.json ADDED
@@ -0,0 +1,162 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "dataset": "cola",
4
+ "type": "task",
5
+ "origin_acc": 80.5,
6
+ "origin_prompt": "Analyze the provided sentence and classify its grammatical correctness as 'Acceptable' or 'Unacceptable'.",
7
+ "atk_acc": 67.5,
8
+ "atk_prompt": "Analysing the provided sentences and classify its pronouns correct as 'Acceptable' or 'Unacceptable'. ",
9
+ "transfer_ori_acc": 10.9,
10
+ "transfer_atk_acc": 54.7
11
+ },
12
+ {
13
+ "dataset": "cola",
14
+ "type": "role",
15
+ "origin_acc": 81.0,
16
+ "origin_prompt": "In the role of a syntax analyzer, examine the grammar of the provided sentence and decide if it is correct, answering with 'acceptable' for grammatically correct sentences or 'unacceptable' for incorrect ones.",
17
+ "atk_acc": 77.5,
18
+ "atk_prompt": "In the role of a syntax checker, examine the grammɑr of the provided sentence and decide if it is correct, answering with 'acceptable' for grammatically correct sentence or 'unacceptable' for incorrect ones. ",
19
+ "transfer_ori_acc": 3.4000000000000004,
20
+ "transfer_atk_acc": 10.299999999999999
21
+ },
22
+ {
23
+ "dataset": "mrpc",
24
+ "type": "task",
25
+ "origin_acc": 73.0,
26
+ "origin_prompt": "Assess if the two given sentences have equivalent meanings by selecting 'equivalent' or 'not_equivalent'.",
27
+ "atk_acc": 68.0,
28
+ "atk_prompt": "Ass ess if the two gien sentences have equivalent meanings by selecting 'equivalent' or 'not_equivalent'. ",
29
+ "transfer_ori_acc": 69.36274509803921,
30
+ "transfer_atk_acc": 70.34313725490196
31
+ },
32
+ {
33
+ "dataset": "mrpc",
34
+ "type": "role",
35
+ "origin_acc": 71.5,
36
+ "origin_prompt": "As a linguistic comparator, review the following pair of sentences and determine their semantic equivalence by choosing 'equivalent' or 'not_equivalent'.",
37
+ "atk_acc": 64.5,
38
+ "atk_prompt": "As a linguistic comparator, review the following torque of sentences and determine their semantiϲ eԛuivalence by choosing 'equivalent' or 'not_equivalent'. ",
39
+ "transfer_ori_acc": 61.51960784313726,
40
+ "transfer_atk_acc": 67.15686274509804
41
+ },
42
+ {
43
+ "dataset": "qnli",
44
+ "type": "task",
45
+ "origin_acc": 85.0,
46
+ "origin_prompt": "Examine the context and question, and determine if the context logically implies the answer by selecting 'entailment' or 'not_entailment'.",
47
+ "atk_acc": 76.0,
48
+ "atk_prompt": "Examine the cntext and question, and determine if the backdrop logically implies the answer by selecting 'entailment' or 'not_entailment'. ",
49
+ "transfer_ori_acc": 26.900000000000002,
50
+ "transfer_atk_acc": 42.0
51
+ },
52
+ {
53
+ "dataset": "qnli",
54
+ "type": "role",
55
+ "origin_acc": 83.0,
56
+ "origin_prompt": "As a semantic researcher, evaluate whether the provided context supports the answer to the question and choose 'entailment' or 'not_entailment'.",
57
+ "atk_acc": 73.5,
58
+ "atk_prompt": "Aѕ a semantic researcher, evaluate si the gave framework supports the respond to the question and choose 'entailment' or 'not_entailment'. ",
59
+ "transfer_ori_acc": 41.6,
60
+ "transfer_atk_acc": 40.1
61
+ },
62
+ {
63
+ "dataset": "qqp",
64
+ "type": "task",
65
+ "origin_acc": 82.5,
66
+ "origin_prompt": "Can these two statements be considered equal in meaning? Answer with 'equivalent' or 'not_equivalent'.",
67
+ "atk_acc": 80.0,
68
+ "atk_prompt": "Can these two st atements be considered equal in meaning? Ansԝer with 'equivalent' or 'not_equivalent'. ",
69
+ "transfer_ori_acc": 36.3,
70
+ "transfer_atk_acc": 31.2
71
+ },
72
+ {
73
+ "dataset": "qqp",
74
+ "type": "role",
75
+ "origin_acc": 82.5,
76
+ "origin_prompt": "In the capacity of a question assessment system, indicate if the meaning of the provided questions is the same, responding with 'equivalent' or 'not_equivalent'.",
77
+ "atk_acc": 80.5,
78
+ "atk_prompt": "In the abilities of a question ɑssessment system, indicate if the meaning of the provided matters is the same, responding with 'equivalent' or 'not_equivalent'. ",
79
+ "transfer_ori_acc": 46.9,
80
+ "transfer_atk_acc": 40.5
81
+ },
82
+ {
83
+ "dataset": "rte",
84
+ "type": "task",
85
+ "origin_acc": 84.0,
86
+ "origin_prompt": "Review the two statements and categorize their relationship as either 'entailment' or 'not_entailment'.",
87
+ "atk_acc": 76.5,
88
+ "atk_prompt": "Review the two declarations and categorize their relationship as neither 'entailment' or 'not_entailment'. ",
89
+ "transfer_ori_acc": 50.54151624548736,
90
+ "transfer_atk_acc": 45.84837545126354
91
+ },
92
+ {
93
+ "dataset": "rte",
94
+ "type": "role",
95
+ "origin_acc": 83.5,
96
+ "origin_prompt": "As an entailment identification system, examine the connection between the following sentences and respond with 'entailment' or 'not_entailment'.",
97
+ "atk_acc": 80.0,
98
+ "atk_prompt": "As an entailment identification system, eamine the connection between the following sentences and reѕpond with 'entailment' or 'not_entailment'. ",
99
+ "transfer_ori_acc": 46.20938628158845,
100
+ "transfer_atk_acc": 47.292418772563174
101
+ },
102
+ {
103
+ "dataset": "sst2",
104
+ "type": "task",
105
+ "origin_acc": 95.5,
106
+ "origin_prompt": "After examining the following expression, label its emotion as either 'positive' or 'negative'.",
107
+ "atk_acc": 74.0,
108
+ "atk_prompt": "Ater examining the following expression, etiquette its emotion as nor 'positive' or 'negative'. ",
109
+ "transfer_ori_acc": 7.912844036697248,
110
+ "transfer_atk_acc": 5.045871559633028
111
+ },
112
+ {
113
+ "dataset": "sst2",
114
+ "type": "role",
115
+ "origin_acc": 97.5,
116
+ "origin_prompt": "In the capacity of a sentiment classifier, decide whether the given quote is 'positive' or 'negative'.",
117
+ "atk_acc": 93.5,
118
+ "atk_prompt": "Onto the capacity of a feeling classifeir, decide si the given quote is 'positive' or 'negative'. ",
119
+ "transfer_ori_acc": 20.18348623853211,
120
+ "transfer_atk_acc": 0.8027522935779817
121
+ },
122
+ {
123
+ "dataset": "wnli",
124
+ "type": "task",
125
+ "origin_acc": 69.01,
126
+ "origin_prompt": "Indicate if the connection between the following sentences is 'entailment' or 'not_entailment'.",
127
+ "atk_acc": 61.97,
128
+ "atk_prompt": "Indicate if the connection between the following sentences is 'entailment' or 'not_entailment' Vq3aBzuZcD. ",
129
+ "transfer_ori_acc": 9.859154929577464,
130
+ "transfer_atk_acc": 18.30985915492958
131
+ },
132
+ {
133
+ "dataset": "wnli",
134
+ "type": "role",
135
+ "origin_acc": 66.2,
136
+ "origin_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment'.",
137
+ "atk_acc": 59.15,
138
+ "atk_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment' is77sOXAu8. ",
139
+ "transfer_ori_acc": 23.943661971830984,
140
+ "transfer_atk_acc": 8.450704225352112
141
+ },
142
+ {
143
+ "dataset": "mmlu",
144
+ "type": "task",
145
+ "origin_acc": 67.5,
146
+ "origin_prompt": "In relation to the multiple-choice question on {}, please provide the accurate answer by choosing 'A', 'B', 'C', or 'D'.",
147
+ "atk_acc": 61.5,
148
+ "atk_prompt": "At relationship to the mul𝚝iple-choice question on {}, please proviԁe the accurate responding by choosing 'A', 'B', 'C', or 'D'. ",
149
+ "transfer_ori_acc": 9.397163120567376,
150
+ "transfer_atk_acc": 4.432624113475177
151
+ },
152
+ {
153
+ "dataset": "mmlu",
154
+ "type": "role",
155
+ "origin_acc": 65.5,
156
+ "origin_prompt": "As someone well-versed in {}, please address the multiple-choice question below by selecting 'A', 'B', 'C', or 'D'.",
157
+ "atk_acc": 61.5,
158
+ "atk_prompt": "As someone well-vesred in {}, please address the muⅼtiple-choice questioո below by selecting 'A', 'B', 'C', or 'D'.",
159
+ "transfer_ori_acc": 8.51063829787234,
160
+ "transfer_atk_acc": 10.815602836879433
161
+ }
162
+ ]
results_transfer/chatgpt_vicuna-13b/textfooler_0_shot.json ADDED
@@ -0,0 +1,162 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "dataset": "cola",
4
+ "type": "task",
5
+ "origin_acc": 79.0,
6
+ "origin_prompt": "Review the sentence below and identify whether its grammar is 'Acceptable' or 'Unacceptable':",
7
+ "atk_acc": 2.0,
8
+ "atk_prompt": "Revising the punishments below and locate whether its dictionary is 'Acceptable' or 'Unacceptable':",
9
+ "transfer_ori_acc": 28.9,
10
+ "transfer_atk_acc": 33.4
11
+ },
12
+ {
13
+ "dataset": "cola",
14
+ "type": "role",
15
+ "origin_acc": 80.0,
16
+ "origin_prompt": "While performing grammar analysis, classify the grammar of the following sentence as 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar:",
17
+ "atk_acc": 6.0,
18
+ "atk_prompt": "While performing jargon discusses, categories the language of the below sentence as 'acceptable' for mend jargon or 'unacceptable' for unfit grammar:",
19
+ "transfer_ori_acc": 27.0,
20
+ "transfer_atk_acc": 39.1
21
+ },
22
+ {
23
+ "dataset": "mrpc",
24
+ "type": "task",
25
+ "origin_acc": 75.0,
26
+ "origin_prompt": "Examine the pair of sentences and decide if their meanings are identical by answering with 'equivalent' or 'not_equivalent'.",
27
+ "atk_acc": 68.5,
28
+ "atk_prompt": "Examine the pair of sentences and selects if their regard are identical by responding with 'equivalent' or 'not_equivalent'. ",
29
+ "transfer_ori_acc": 64.2156862745098,
30
+ "transfer_atk_acc": 43.872549019607845
31
+ },
32
+ {
33
+ "dataset": "mrpc",
34
+ "type": "role",
35
+ "origin_acc": 75.5,
36
+ "origin_prompt": "In your capacity as a semantic assessment tool, evaluate the provided sentences and classify their meanings as 'equivalent' or 'not_equivalent'.",
37
+ "atk_acc": 0.0,
38
+ "atk_prompt": "About your expertise as a semantic appraisal means, evaluate the provided penalty and classify their meanings as 'equivalent' or 'not_equivalent'. ",
39
+ "transfer_ori_acc": 48.03921568627451,
40
+ "transfer_atk_acc": 22.54901960784314
41
+ },
42
+ {
43
+ "dataset": "qnli",
44
+ "type": "task",
45
+ "origin_acc": 77.5,
46
+ "origin_prompt": "Consider the context and question, and indicate if the answer can be logically deduced from the context by responding with 'entailment' or 'not_entailment'.",
47
+ "atk_acc": 0.5,
48
+ "atk_prompt": "Recital the context and items, and specify if the solution can be logically subtracted from the framework by reactions with 'entailment' or 'not_entailment'. ",
49
+ "transfer_ori_acc": 32.9,
50
+ "transfer_atk_acc": 4.0
51
+ },
52
+ {
53
+ "dataset": "qnli",
54
+ "type": "role",
55
+ "origin_acc": 78.0,
56
+ "origin_prompt": "As a semantic researcher, evaluate whether the provided context supports the answer to the question and choose 'entailment' or 'not_entailment'.",
57
+ "atk_acc": 0.0,
58
+ "atk_prompt": "As a semantic fellows, measuring whether the rendered background supports the answers to the item and chosen 'entailment' or 'not_entailment'. ",
59
+ "transfer_ori_acc": 13.4,
60
+ "transfer_atk_acc": 6.6000000000000005
61
+ },
62
+ {
63
+ "dataset": "qqp",
64
+ "type": "task",
65
+ "origin_acc": 80.5,
66
+ "origin_prompt": "Can these two statements be considered equal in meaning? Answer with 'equivalent' or 'not_equivalent'.",
67
+ "atk_acc": 0.0,
68
+ "atk_prompt": "Kan these two statement is studied same in sens? Reaction with 'equivalent' or 'not_equivalent'. ",
69
+ "transfer_ori_acc": 22.900000000000002,
70
+ "transfer_atk_acc": 7.3999999999999995
71
+ },
72
+ {
73
+ "dataset": "qqp",
74
+ "type": "role",
75
+ "origin_acc": 80.5,
76
+ "origin_prompt": "Functioning as a question similarity evaluation tool, analyze the given questions and decide if they share the same meaning, responding with 'equivalent' or 'not_equivalent'.",
77
+ "atk_acc": 76.5,
78
+ "atk_prompt": "Functioning as a question similarity evaluation tool, analyse the given questions and decide if they share the same feel, responding with 'equivalent' or 'not_equivalent'. ",
79
+ "transfer_ori_acc": 14.6,
80
+ "transfer_atk_acc": 20.8
81
+ },
82
+ {
83
+ "dataset": "rte",
84
+ "type": "task",
85
+ "origin_acc": 83.5,
86
+ "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.",
87
+ "atk_acc": 60.5,
88
+ "atk_prompt": "Pose the aftermath two phrases entailment or not_entailment? Reactions me with \"entailment\" or \"not_entailment\", just one adjectives. ",
89
+ "transfer_ori_acc": 27.075812274368232,
90
+ "transfer_atk_acc": 36.462093862815884
91
+ },
92
+ {
93
+ "dataset": "rte",
94
+ "type": "role",
95
+ "origin_acc": 81.0,
96
+ "origin_prompt": "As an instrument for entailment evaluation, consider the two sentences and determine if their relationship is 'entailment' or 'not_entailment'. Respond with 'entailment' or 'not_entailment':",
97
+ "atk_acc": 60.0,
98
+ "atk_prompt": "Because an devices for entailment appraised, revisit the two phrases and identifying if their correlate is 'entailment' or 'not_entailment'. Answered with 'entailment' or 'not_entailment':",
99
+ "transfer_ori_acc": 24.90974729241877,
100
+ "transfer_atk_acc": 9.747292418772563
101
+ },
102
+ {
103
+ "dataset": "sst2",
104
+ "type": "task",
105
+ "origin_acc": 90.5,
106
+ "origin_prompt": "Review this statement and decide whether it has a 'positive' or 'negative' sentiment:",
107
+ "atk_acc": 0.0,
108
+ "atk_prompt": "Analysis this assertion and defining whether it ai a 'positive' or 'negative' sentiment: ",
109
+ "transfer_ori_acc": 42.54587155963303,
110
+ "transfer_atk_acc": 35.091743119266056
111
+ },
112
+ {
113
+ "dataset": "sst2",
114
+ "type": "role",
115
+ "origin_acc": 94.0,
116
+ "origin_prompt": "Emulating a sentiment classification system, indicate whether the provided text is 'positive' or 'negative'. Classify:",
117
+ "atk_acc": 4.5,
118
+ "atk_prompt": "Emulating a foreboding classifies regime, explain whether the stipulated laws is 'positive' or 'negative'. Grading: ",
119
+ "transfer_ori_acc": 6.536697247706422,
120
+ "transfer_atk_acc": 35.77981651376147
121
+ },
122
+ {
123
+ "dataset": "wnli",
124
+ "type": "task",
125
+ "origin_acc": 69.01,
126
+ "origin_prompt": "Identify whether the given pair of sentences demonstrates entailment or not_entailment. Answer with 'entailment' or 'not_entailment'.",
127
+ "atk_acc": 52.11,
128
+ "atk_prompt": "Identify whether the given pair of sentence exposes entailment or not_entailment. Retaliated with 'entailment' or 'not_entailment'.",
129
+ "transfer_ori_acc": 33.80281690140845,
130
+ "transfer_atk_acc": 42.25352112676056
131
+ },
132
+ {
133
+ "dataset": "wnli",
134
+ "type": "role",
135
+ "origin_acc": 67.61,
136
+ "origin_prompt": "Acting as an entailment detection instrument, determine if the given pair of sentences demonstrates entailment or not_entailment. Answer with 'entailment' or 'not_entailment':",
137
+ "atk_acc": 46.48,
138
+ "atk_prompt": "Acting as an entailment detection instrument, determine if the given torque of sentences depicting entailment or not_entailment. Reactions with 'entailment' or 'not_entailment':",
139
+ "transfer_ori_acc": 43.66197183098591,
140
+ "transfer_atk_acc": 39.436619718309856
141
+ },
142
+ {
143
+ "dataset": "mmlu",
144
+ "type": "task",
145
+ "origin_acc": 64.0,
146
+ "origin_prompt": "In relation to the multiple-choice question on {}, please provide the accurate answer by choosing 'A', 'B', 'C', or 'D'.",
147
+ "atk_acc": 50.5,
148
+ "atk_prompt": "In relation to the multiple-choice issue on {}, please delivering the clarification rebuttal by selector 'A', 'B', 'C', or 'D'. ",
149
+ "transfer_ori_acc": 19.326241134751772,
150
+ "transfer_atk_acc": 20.390070921985814
151
+ },
152
+ {
153
+ "dataset": "mmlu",
154
+ "type": "role",
155
+ "origin_acc": 61.5,
156
+ "origin_prompt": "As a skilled person in the field of {}, please respond to the multiple-choice question by choosing 'A', 'B', 'C', or 'D'.",
157
+ "atk_acc": 54.5,
158
+ "atk_prompt": "As a expert person in the minefields of {}, please reacts to the multiple-choice question by choice 'A', 'B', 'C', or 'D'.",
159
+ "transfer_ori_acc": 20.0354609929078,
160
+ "transfer_atk_acc": 21.631205673758867
161
+ }
162
+ ]
results_transfer/chatgpt_vicuna-13b/textfooler_3_shot.json ADDED
@@ -0,0 +1,162 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "dataset": "cola",
4
+ "type": "task",
5
+ "origin_acc": 80.5,
6
+ "origin_prompt": "Examine the sentence and decide if its grammar is 'Acceptable' or 'Unacceptable'.",
7
+ "atk_acc": 39.5,
8
+ "atk_prompt": "Exams the sentence and defining if its jargon is 'Acceptable' or 'Unacceptable'. ",
9
+ "transfer_ori_acc": 41.8,
10
+ "transfer_atk_acc": 57.8
11
+ },
12
+ {
13
+ "dataset": "cola",
14
+ "type": "role",
15
+ "origin_acc": 81.0,
16
+ "origin_prompt": "In the role of a syntax analyzer, examine the grammar of the provided sentence and decide if it is correct, answering with 'acceptable' for grammatically correct sentences or 'unacceptable' for incorrect ones.",
17
+ "atk_acc": 76.0,
18
+ "atk_prompt": "Across the role of a syntax analyzer, examine the dictionary of the provided chastisement and decide if it is adequate, answered with 'acceptable' for grammatically correct sentences or 'unacceptable' for deceptive ones. ",
19
+ "transfer_ori_acc": 3.4000000000000004,
20
+ "transfer_atk_acc": 11.600000000000001
21
+ },
22
+ {
23
+ "dataset": "mrpc",
24
+ "type": "task",
25
+ "origin_acc": 73.0,
26
+ "origin_prompt": "Assess if the two given sentences have equivalent meanings by selecting 'equivalent' or 'not_equivalent'.",
27
+ "atk_acc": 59.0,
28
+ "atk_prompt": "Appraisal if the two accorded sentences enjoys equivalent connotation by choice 'equivalent' or 'not_equivalent'. ",
29
+ "transfer_ori_acc": 69.36274509803921,
30
+ "transfer_atk_acc": 69.36274509803921
31
+ },
32
+ {
33
+ "dataset": "mrpc",
34
+ "type": "role",
35
+ "origin_acc": 71.5,
36
+ "origin_prompt": "In the role of a textual comparison specialist, examine the given sentences and decide if they share the same meaning, responding with 'equivalent' or 'not_equivalent'.",
37
+ "atk_acc": 57.0,
38
+ "atk_prompt": "De the role of a textual comparisons scholar, revisited the given sentences and decide if they exchange the same feel, responding with 'equivalent' or 'not_equivalent'. ",
39
+ "transfer_ori_acc": 69.36274509803921,
40
+ "transfer_atk_acc": 55.14705882352941
41
+ },
42
+ {
43
+ "dataset": "qnli",
44
+ "type": "task",
45
+ "origin_acc": 84.5,
46
+ "origin_prompt": "Examine the context and question, and determine if the context logically implies the answer by selecting 'entailment' or 'not_entailment'.",
47
+ "atk_acc": 63.5,
48
+ "atk_prompt": "Discuss the background and question, and determine if the framework intuitively presupposes the answer by akhtar 'entailment' or 'not_entailment'. ",
49
+ "transfer_ori_acc": 26.900000000000002,
50
+ "transfer_atk_acc": 33.0
51
+ },
52
+ {
53
+ "dataset": "qnli",
54
+ "type": "role",
55
+ "origin_acc": 85.5,
56
+ "origin_prompt": "As a textual analyst, examine if the given context logically implies the answer to the question and indicate your decision with 'entailment' or 'not_entailment'.",
57
+ "atk_acc": 68.5,
58
+ "atk_prompt": "Iike a textual analyst, analyze if the submitted regard rationally implies the replies to the issuing and indicates your clout with 'entailment' or 'not_entailment'. ",
59
+ "transfer_ori_acc": 31.4,
60
+ "transfer_atk_acc": 12.4
61
+ },
62
+ {
63
+ "dataset": "qqp",
64
+ "type": "task",
65
+ "origin_acc": 83.5,
66
+ "origin_prompt": "Determine if the given pair of statements can be considered the same by responding with 'equivalent' or 'not_equivalent'.",
67
+ "atk_acc": 80.0,
68
+ "atk_prompt": "Determine if the given pair of representations can constitute felt the same by responding with 'equivalent' or 'not_equivalent'. ",
69
+ "transfer_ori_acc": 29.9,
70
+ "transfer_atk_acc": 32.9
71
+ },
72
+ {
73
+ "dataset": "qqp",
74
+ "type": "role",
75
+ "origin_acc": 83.0,
76
+ "origin_prompt": "Acting as a question equivalence instrument, determine if the provided questions are equivalent in meaning, answering with 'equivalent' for similar questions or 'not_equivalent' for dissimilar ones.",
77
+ "atk_acc": 80.0,
78
+ "atk_prompt": "Interim as a issues equivalence instrument, determine if the provided aspects are equivalent in meaning, answering with 'equivalent' for selfsame questions or 'not_equivalent' for dissimilar ones. ",
79
+ "transfer_ori_acc": 34.599999999999994,
80
+ "transfer_atk_acc": 32.5
81
+ },
82
+ {
83
+ "dataset": "rte",
84
+ "type": "task",
85
+ "origin_acc": 84.5,
86
+ "origin_prompt": "Does the relationship between the given sentences represent entailment or not_entailment? Respond with 'entailment' or 'not_entailment'.",
87
+ "atk_acc": 79.5,
88
+ "atk_prompt": "Do the coitus between the given sentences comprised entailment or not_entailment? Meet with 'entailment' or 'not_entailment'. ",
89
+ "transfer_ori_acc": 46.20938628158845,
90
+ "transfer_atk_acc": 44.4043321299639
91
+ },
92
+ {
93
+ "dataset": "rte",
94
+ "type": "role",
95
+ "origin_acc": 83.0,
96
+ "origin_prompt": "As an entailment identification system, examine the connection between the following sentences and respond with 'entailment' or 'not_entailment'.",
97
+ "atk_acc": 79.5,
98
+ "atk_prompt": "As an entailment identification system, revisited the connection between the following sentences and respondent with 'entailment' or 'not_entailment'. ",
99
+ "transfer_ori_acc": 46.20938628158845,
100
+ "transfer_atk_acc": 44.765342960288805
101
+ },
102
+ {
103
+ "dataset": "sst2",
104
+ "type": "task",
105
+ "origin_acc": 96.0,
106
+ "origin_prompt": "Read the provided excerpt and choose between 'positive' and 'negative' to describe its sentiment.",
107
+ "atk_acc": 85.0,
108
+ "atk_prompt": "Lu the rendered excerpt and withheld between 'positive' and 'negative' to describe its foreboding. ",
109
+ "transfer_ori_acc": 1.261467889908257,
110
+ "transfer_atk_acc": 3.4403669724770642
111
+ },
112
+ {
113
+ "dataset": "sst2",
114
+ "type": "role",
115
+ "origin_acc": 97.5,
116
+ "origin_prompt": "Emulating a sentiment classification system, indicate whether the provided text is 'positive' or 'negative'.",
117
+ "atk_acc": 91.5,
118
+ "atk_prompt": "Emulating a ambience classification mechanisms, describe whether the presented writings is 'positive' or 'negative'. ",
119
+ "transfer_ori_acc": 22.93577981651376,
120
+ "transfer_atk_acc": 3.211009174311927
121
+ },
122
+ {
123
+ "dataset": "wnli",
124
+ "type": "task",
125
+ "origin_acc": 69.01,
126
+ "origin_prompt": "Indicate if the connection between the following sentences is 'entailment' or 'not_entailment'.",
127
+ "atk_acc": 59.15,
128
+ "atk_prompt": "Indicate if the connection between the subsequently sentences is 'entailment' or 'not_entailment'. ",
129
+ "transfer_ori_acc": 9.859154929577464,
130
+ "transfer_atk_acc": 9.859154929577464
131
+ },
132
+ {
133
+ "dataset": "wnli",
134
+ "type": "role",
135
+ "origin_acc": 66.2,
136
+ "origin_prompt": "In the role of a semantic relationship analyzer, examine the connection between the given sentences and decide if they exhibit entailment or not_entailment. Answer with 'entailment' or 'not_entailment'.",
137
+ "atk_acc": 57.75,
138
+ "atk_prompt": "In the role of a semantic contacts analyzer, analysed the connecting between the given sentences and choose if they exhibit entailment or not_entailment. Answer with 'entailment' or 'not_entailment'. ",
139
+ "transfer_ori_acc": 5.633802816901409,
140
+ "transfer_atk_acc": 14.084507042253522
141
+ },
142
+ {
143
+ "dataset": "mmlu",
144
+ "type": "task",
145
+ "origin_acc": 67.0,
146
+ "origin_prompt": "Please respond to the multiple-choice question about {} by selecting the appropriate answer: 'A', 'B', 'C', or 'D'.",
147
+ "atk_acc": 60.0,
148
+ "atk_prompt": "Asking reactions to the multiple-choice question about {} by selecting the appropriate reacts: 'A', 'B', 'C', or 'D'. ",
149
+ "transfer_ori_acc": 8.865248226950355,
150
+ "transfer_atk_acc": 16.134751773049647
151
+ },
152
+ {
153
+ "dataset": "mmlu",
154
+ "type": "role",
155
+ "origin_acc": 66.0,
156
+ "origin_prompt": "With your knowledge of {}, tackle the following multiple-choice question by choosing 'A', 'B', 'C', or 'D'.",
157
+ "atk_acc": 61.5,
158
+ "atk_prompt": "From your acquaintances of {}, addressing the following multiple-choice question by choosing 'A', 'B', 'C', or 'D'.",
159
+ "transfer_ori_acc": 8.865248226950355,
160
+ "transfer_atk_acc": 10.28368794326241
161
+ }
162
+ ]
results_transfer/chatgpt_vicuna-13b/translation_0_shot.json ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "dataset": "cola",
4
+ "type": "translation",
5
+ "origin_acc": 79.5,
6
+ "origin_prompt": "Functioning as a grammar evaluation tool, analyze the given sentence and decide if it is grammatically correct, responding with 'acceptable' or 'unacceptable':",
7
+ "atk_acc": 25.0,
8
+ "atk_prompt": "Classify the grammar of a given sentence into two categories, 'acceptable' or 'unacceptable', and judge its accuracy.",
9
+ "transfer_ori_acc": 57.099999999999994,
10
+ "transfer_atk_acc": 31.5
11
+ },
12
+ {
13
+ "dataset": "mrpc",
14
+ "type": "translation",
15
+ "origin_acc": 76.5,
16
+ "origin_prompt": "Can the given sentences be considered semantically identical? Please reply with 'equivalent' or 'not_equivalent'.",
17
+ "atk_acc": 7.0,
18
+ "atk_prompt": "As an expert in semantic comparison, assess the similarity between the following two sentences and determine whether they are 'equivalent' or 'not_equivalent'.",
19
+ "transfer_ori_acc": 53.431372549019606,
20
+ "transfer_atk_acc": 26.225490196078432
21
+ },
22
+ {
23
+ "dataset": "qnli",
24
+ "type": "translation",
25
+ "origin_acc": 79.5,
26
+ "origin_prompt": "Assess if the answer to the question can be logically concluded from the provided context by choosing 'entailment' or 'not_entailment'.",
27
+ "atk_acc": 56.5,
28
+ "atk_prompt": "Check the text link to the question and answer 'entailment' or 'not_entailment', depending on your language skills.",
29
+ "transfer_ori_acc": 19.7,
30
+ "transfer_atk_acc": 5.2
31
+ },
32
+ {
33
+ "dataset": "qqp",
34
+ "type": "translation",
35
+ "origin_acc": 84.0,
36
+ "origin_prompt": "Determine if the given pair of statements can be considered the same by responding with 'equivalent' or 'not_equivalent'.",
37
+ "atk_acc": 3.0,
38
+ "atk_prompt": "As a tool for determining the equivalence of problems, the problems are examined and their similarity is classified as 'equivalent' or 'not_equivalent'.",
39
+ "transfer_ori_acc": 13.5,
40
+ "transfer_atk_acc": 2.3
41
+ },
42
+ {
43
+ "dataset": "rte",
44
+ "type": "translation",
45
+ "origin_acc": 84.0,
46
+ "origin_prompt": "Determine if the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment'.",
47
+ "atk_acc": 39.0,
48
+ "atk_prompt": "Determine whether the given sentences have a cause-effect relationship as an engagement analysis tool and categorize them as 'entailment' or 'not_entailment'.",
49
+ "transfer_ori_acc": 41.15523465703971,
50
+ "transfer_atk_acc": 22.382671480144403
51
+ },
52
+ {
53
+ "dataset": "sst2",
54
+ "type": "translation",
55
+ "origin_acc": 96.0,
56
+ "origin_prompt": "Read the provided excerpt and choose between 'positive' and 'negative' to describe its sentiment:",
57
+ "atk_acc": 29.5,
58
+ "atk_prompt": "In the context of textual analysis, what classification is this phrase between 'positive' and 'negative'?",
59
+ "transfer_ori_acc": 27.981651376146786,
60
+ "transfer_atk_acc": 44.8394495412844
61
+ },
62
+ {
63
+ "dataset": "wnli",
64
+ "type": "translation",
65
+ "origin_acc": 67.61,
66
+ "origin_prompt": "Examine the pair of sentences and determine if they exhibit entailment or not_entailment. Answer with either 'entailment' or 'not_entailment'.",
67
+ "atk_acc": 26.76,
68
+ "atk_prompt": "Using implication analysis, determine whether the given sentences have a cause-effect relationship and categorize them as 'entailment' or 'not_entailment'.",
69
+ "transfer_ori_acc": 33.80281690140845,
70
+ "transfer_atk_acc": 36.61971830985916
71
+ },
72
+ {
73
+ "dataset": "mmlu",
74
+ "type": "translation",
75
+ "origin_acc": 64.0,
76
+ "origin_prompt": "In relation to the multiple-choice question on {}, please provide the accurate answer by choosing 'A', 'B', 'C', or 'D'.",
77
+ "atk_acc": 49.5,
78
+ "atk_prompt": "Choose the correct answer to the multiple-choice question below. Is '{}' an 'A', 'B', 'C', or 'D'.",
79
+ "transfer_ori_acc": 19.326241134751772,
80
+ "transfer_atk_acc": 19.680851063829788
81
+ }
82
+ ]
results_transfer/chatgpt_vicuna-13b/translation_3_shot.json ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "dataset": "cola",
4
+ "type": "translation",
5
+ "origin_acc": 81.5,
6
+ "origin_prompt": "Functioning as a grammar evaluation tool, analyze the given sentence and decide if it is grammatically correct, responding with 'acceptable' or 'unacceptable'.",
7
+ "atk_acc": 68.5,
8
+ "atk_prompt": "Classify the grammar of a given sentence as 'acceptable' or 'unacceptable', and judge its accuracy.",
9
+ "transfer_ori_acc": 23.9,
10
+ "transfer_atk_acc": 28.7
11
+ },
12
+ {
13
+ "dataset": "mrpc",
14
+ "type": "translation",
15
+ "origin_acc": 74.5,
16
+ "origin_prompt": "Compare the following sentences and determine if they share the same semantic meaning by responding with 'equivalent' or 'not_equivalent'.",
17
+ "atk_acc": 65.0,
18
+ "atk_prompt": "Based on my experience in semantic analysis, classify the following two sentences between 'equivalent' or 'not_equivalent'.",
19
+ "transfer_ori_acc": 62.254901960784316,
20
+ "transfer_atk_acc": 60.5392156862745
21
+ },
22
+ {
23
+ "dataset": "qnli",
24
+ "type": "translation",
25
+ "origin_acc": 86.0,
26
+ "origin_prompt": "As a textual analyst, examine if the given context logically implies the answer to the question and indicate your decision with 'entailment' or 'not_entailment'.",
27
+ "atk_acc": 71.5,
28
+ "atk_prompt": "Check the text link to the question and answer 'entailment' or 'not_entailment', depending on your language skills.",
29
+ "transfer_ori_acc": 31.4,
30
+ "transfer_atk_acc": 2.0
31
+ },
32
+ {
33
+ "dataset": "qqp",
34
+ "type": "translation",
35
+ "origin_acc": 84.0,
36
+ "origin_prompt": "In the capacity of a question assessment system, indicate if the meaning of the provided questions is the same, responding with 'equivalent' or 'not_equivalent'.",
37
+ "atk_acc": 80.5,
38
+ "atk_prompt": "By analyzing the similarity of these problems, they are divided into categories of 'equivalent' or 'not_equivalent'.",
39
+ "transfer_ori_acc": 46.9,
40
+ "transfer_atk_acc": 37.6
41
+ },
42
+ {
43
+ "dataset": "rte",
44
+ "type": "translation",
45
+ "origin_acc": 84.5,
46
+ "origin_prompt": "Does the relationship between the given sentences represent entailment or not_entailment? Respond with 'entailment' or 'not_entailment'.",
47
+ "atk_acc": 80.0,
48
+ "atk_prompt": "Determine whether the given sentences have a cause-effect relationship as an engagement analysis tool and categorize them as 'entailment' or 'not_entailment'.",
49
+ "transfer_ori_acc": 46.20938628158845,
50
+ "transfer_atk_acc": 23.465703971119133
51
+ },
52
+ {
53
+ "dataset": "sst2",
54
+ "type": "translation",
55
+ "origin_acc": 97.5,
56
+ "origin_prompt": "In the capacity of a sentiment classifier, decide whether the given quote is 'positive' or 'negative'.",
57
+ "atk_acc": 68.5,
58
+ "atk_prompt": "In the context of textual analysis, what classification is this phrase between 'positive' and 'negative'?",
59
+ "transfer_ori_acc": 20.18348623853211,
60
+ "transfer_atk_acc": 32.798165137614674
61
+ },
62
+ {
63
+ "dataset": "wnli",
64
+ "type": "translation",
65
+ "origin_acc": 74.65,
66
+ "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.",
67
+ "atk_acc": 59.15,
68
+ "atk_prompt": "Determinate the semantic connections of a given sentence and classify it as 'entailment' or 'not_entailment'.",
69
+ "transfer_ori_acc": 33.80281690140845,
70
+ "transfer_atk_acc": 4.225352112676056
71
+ },
72
+ {
73
+ "dataset": "mmlu",
74
+ "type": "translation",
75
+ "origin_acc": 67.0,
76
+ "origin_prompt": "Please respond to the multiple-choice question about {} by selecting the appropriate answer: 'A', 'B', 'C', or 'D'.",
77
+ "atk_acc": 63.0,
78
+ "atk_prompt": "Rate your understanding of the multiple-choice question on {}. Choose the correct answer from options 'A', 'B', 'C' or 'D'.",
79
+ "transfer_ori_acc": 8.865248226950355,
80
+ "transfer_atk_acc": 8.156028368794328
81
+ }
82
+ ]
results_transfer/google-flan-t5-large_chatgpt/bertattack_0_shot.json ADDED
@@ -0,0 +1,242 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "dataset": "cola",
4
+ "type": "task",
5
+ "origin_acc": 77.9,
6
+ "origin_prompt": "Examine the given sentence and decide if it is grammatically sound. Answer with either 'Acceptable' or 'Unacceptable':",
7
+ "atk_acc": 30.9,
8
+ "atk_prompt": "examine the english plural and say if it is grammaticalmost sound. Answer with neither 'Acceptable' or 'Unacceptable':",
9
+ "transfer_ori_acc": 77.0,
10
+ "transfer_atk_acc": 36.0
11
+ },
12
+ {
13
+ "dataset": "cola",
14
+ "type": "role",
15
+ "origin_acc": 76.3,
16
+ "origin_prompt": "As an instrument for grammar evaluation, consider the sentence and determine if its grammar is correct, responding with 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar:",
17
+ "atk_acc": 30.9,
18
+ "atk_prompt": "for an organ for linguistic evaluation, write the movie and question if its grammar is incorrect, and with 'acceptable' for new grammar or 'unacceptable' for unstable grammar:",
19
+ "transfer_ori_acc": 79.0,
20
+ "transfer_atk_acc": 0.0
21
+ },
22
+ {
23
+ "dataset": "mrpc",
24
+ "type": "task",
25
+ "origin_acc": 82.11,
26
+ "origin_prompt": "Do these two sentences have the same underlying meaning? Respond with 'equivalent' or 'not_equivalent'.",
27
+ "atk_acc": 16.67,
28
+ "atk_prompt": "do these two clauses have the same underlying meanings? deal with 'equivalent' or 'not_equivalent'. ",
29
+ "transfer_ori_acc": 73.5,
30
+ "transfer_atk_acc": 69.5
31
+ },
32
+ {
33
+ "dataset": "mrpc",
34
+ "type": "role",
35
+ "origin_acc": 82.6,
36
+ "origin_prompt": "In your capacity as a semantic comparison tool, examine the given sentences and decide if their meanings are identical, responding with 'equivalent' or 'not_equivalent'.",
37
+ "atk_acc": 9.07,
38
+ "atk_prompt": "using your capacity as a semantic relations member, determine the given words and decide if their meanings are consecutive, comparison with 'equivalent' or 'not_equivalent'. ",
39
+ "transfer_ori_acc": 75.0,
40
+ "transfer_atk_acc": 4.5
41
+ },
42
+ {
43
+ "dataset": "qnli",
44
+ "type": "task",
45
+ "origin_acc": 88.5,
46
+ "origin_prompt": "Based on the provided context and question, decide if the information supports the answer by responding with 'entailment' or 'not_entailment'.",
47
+ "atk_acc": 27.6,
48
+ "atk_prompt": "following on the supplied context and questioning, save if the information states the response by simply with 'entailment' or 'not_entailment'. ",
49
+ "transfer_ori_acc": 78.0,
50
+ "transfer_atk_acc": 36.0
51
+ },
52
+ {
53
+ "dataset": "qnli",
54
+ "type": "role",
55
+ "origin_acc": 89.2,
56
+ "origin_prompt": "As a textual analyst, examine if the given context logically implies the answer to the question and indicate your decision with 'entailment' or 'not_entailment'.",
57
+ "atk_acc": 11.8,
58
+ "atk_prompt": "as a textual analyst, scrutiny if the following means logical) implies the reply to the answer and cross your r with 'entailment' or 'not_entailment'. ",
59
+ "transfer_ori_acc": 65.5,
60
+ "transfer_atk_acc": 9.0
61
+ },
62
+ {
63
+ "dataset": "qqp",
64
+ "type": "task",
65
+ "origin_acc": 88.1,
66
+ "origin_prompt": "Are the following two questions equivalent or not? Answer me with \"equivalent\" or \"not_equivalent\".",
67
+ "atk_acc": 80.2,
68
+ "atk_prompt": "to the next enough being equivalent or not? Answer me with \"equivalent\" or \"not_equivalent\". ",
69
+ "transfer_ori_acc": 81.0,
70
+ "transfer_atk_acc": 74.5
71
+ },
72
+ {
73
+ "dataset": "qqp",
74
+ "type": "role",
75
+ "origin_acc": 87.7,
76
+ "origin_prompt": "Acting as a question equivalence instrument, determine if the provided questions are equivalent in meaning, answering with 'equivalent' for similar questions or 'not_equivalent' for dissimilar ones.",
77
+ "atk_acc": 72.2,
78
+ "atk_prompt": "t as a question equal instrument, determine if the program questions are equivalent in semantic, asking with 'equivalent' for analogous question or 'not_equivalent' for nonssimicomplex times. ",
79
+ "transfer_ori_acc": 80.5,
80
+ "transfer_atk_acc": 81.5
81
+ },
82
+ {
83
+ "dataset": "rte",
84
+ "type": "task",
85
+ "origin_acc": 86.64,
86
+ "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.",
87
+ "atk_acc": 81.59,
88
+ "atk_prompt": "be the following deux verbs entailment or not_entailment? asking me with \"entailment\" or \"not_entailment\", just one word. ",
89
+ "transfer_ori_acc": 81.0,
90
+ "transfer_atk_acc": 76.5
91
+ },
92
+ {
93
+ "dataset": "rte",
94
+ "type": "role",
95
+ "origin_acc": 84.84,
96
+ "origin_prompt": "While performing entailment analysis, classify the relationship between the provided sentences as 'entailment' or 'not_entailment':",
97
+ "atk_acc": 78.7,
98
+ "atk_prompt": "While performing entailment analysis, construct the relationship between the and sentence as 'entailment' or 'not_entailment':",
99
+ "transfer_ori_acc": 78.0,
100
+ "transfer_atk_acc": 75.5
101
+ },
102
+ {
103
+ "dataset": "sst2",
104
+ "type": "task",
105
+ "origin_acc": 94.95,
106
+ "origin_prompt": "Given the context of this text, indicate if the emotion conveyed is 'positive' or 'negative':",
107
+ "atk_acc": 92.89,
108
+ "atk_prompt": "give the context of this context, and if the emotion recorded is 'positive' or 'negative': ",
109
+ "transfer_ori_acc": 86.5,
110
+ "transfer_atk_acc": 0.0
111
+ },
112
+ {
113
+ "dataset": "sst2",
114
+ "type": "role",
115
+ "origin_acc": 95.41,
116
+ "origin_prompt": "In the role of a sentiment analysis tool, respond with 'positive' or 'negative' to classify this statement:",
117
+ "atk_acc": 89.79,
118
+ "atk_prompt": "how the role of a compliment analysis tool, responses with 'positive' or 'negative' to mood this statement: ",
119
+ "transfer_ori_acc": 92.5,
120
+ "transfer_atk_acc": 90.0
121
+ },
122
+ {
123
+ "dataset": "wnli",
124
+ "type": "task",
125
+ "origin_acc": 66.2,
126
+ "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.",
127
+ "atk_acc": 54.93,
128
+ "atk_prompt": "are the following analogous sentence entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one verb. ",
129
+ "transfer_ori_acc": 64.7887323943662,
130
+ "transfer_atk_acc": 63.38028169014085
131
+ },
132
+ {
133
+ "dataset": "wnli",
134
+ "type": "role",
135
+ "origin_acc": 64.79,
136
+ "origin_prompt": "In the capacity of an entailment assessment system, indicate if the link between the following sentences is 'entailment' or 'not_entailment':",
137
+ "atk_acc": 56.34,
138
+ "atk_prompt": "h the capacity of an entailment assessment system, indicate if the link between the following verb is 'entailment' or 'not_entailment':",
139
+ "transfer_ori_acc": 57.74647887323944,
140
+ "transfer_atk_acc": 49.29577464788733
141
+ },
142
+ {
143
+ "dataset": "mmlu",
144
+ "type": "task",
145
+ "origin_acc": 45.57,
146
+ "origin_prompt": "For the multiple-choice question related to {}, please choose the most accurate answer from 'A', 'B', 'C', or 'D'.",
147
+ "atk_acc": 16.84,
148
+ "atk_prompt": "For the multiple-choice choice similar to {}, please find the most inaccurate questions from 'A', 'B', 'C', or 'D'. ",
149
+ "transfer_ori_acc": 63.0,
150
+ "transfer_atk_acc": 10.0
151
+ },
152
+ {
153
+ "dataset": "mmlu",
154
+ "type": "role",
155
+ "origin_acc": 44.86,
156
+ "origin_prompt": "Utilizing your expertise in {}, answer the following multiple-choice question by picking 'A', 'B', 'C', or 'D'.",
157
+ "atk_acc": 3.19,
158
+ "atk_prompt": "Utilizing your expertise in {}, write the following multiple-choices question by each 'A', 'B', 'C', or 'D'.",
159
+ "transfer_ori_acc": 60.0,
160
+ "transfer_atk_acc": 59.5
161
+ },
162
+ {
163
+ "dataset": "squad_v2",
164
+ "type": "task",
165
+ "origin_acc": 87.82,
166
+ "origin_prompt": "Search the context for the most relevant answer. If the answer cannot be found, respond with 'unanswerable'.",
167
+ "atk_acc": 36.51,
168
+ "atk_prompt": "return the topic for the most mentioned question. then the answer cannot be found, respond with 'unanswerable'.",
169
+ "transfer_ori_acc": 61.62386143989461,
170
+ "transfer_atk_acc": 63.478546402546655
171
+ },
172
+ {
173
+ "dataset": "squad_v2",
174
+ "type": "role",
175
+ "origin_acc": 88.05,
176
+ "origin_prompt": "As an authority on the context, locate the most accurate answer. If the context doesn't contain the answer, mention 'unanswerable'.",
177
+ "atk_acc": 42.92,
178
+ "atk_prompt": "in an authority on the context, discover the most incorrect explanation. ensure the person doesn't contains the answer, mention 'unanswerable'.",
179
+ "transfer_ori_acc": 57.818138335095256,
180
+ "transfer_atk_acc": 35.36431912991652
181
+ },
182
+ {
183
+ "dataset": "iwslt",
184
+ "type": "task",
185
+ "origin_acc": 0.14,
186
+ "origin_prompt": "Please perform a translation of the given sentence, converting it from {} to {}:",
187
+ "atk_acc": 0.13,
188
+ "atk_prompt": "Please perform a translation of the given citation, converting it from {} to {}:",
189
+ "transfer_ori_acc": 0.21840008360286153,
190
+ "transfer_atk_acc": 0.2179261989361023
191
+ },
192
+ {
193
+ "dataset": "iwslt",
194
+ "type": "role",
195
+ "origin_acc": 0.14,
196
+ "origin_prompt": "In the capacity of a proficient translator, please change the following sentence from {} to {}:",
197
+ "atk_acc": 0.11,
198
+ "atk_prompt": "i the capacity of a possessed translator, may modification the following sentence from {} to {}: ",
199
+ "transfer_ori_acc": 0.22335240384171567,
200
+ "transfer_atk_acc": 0.21951448904038018
201
+ },
202
+ {
203
+ "dataset": "un_multi",
204
+ "type": "task",
205
+ "origin_acc": 0.27,
206
+ "origin_prompt": "Convert the following sentence from its original {} language to the target language {}:",
207
+ "atk_acc": 0.21,
208
+ "atk_prompt": "refer the following sentence from its original {} language to the targets i {}:",
209
+ "transfer_ori_acc": 0.32648562280284094,
210
+ "transfer_atk_acc": 0.32318435707787674
211
+ },
212
+ {
213
+ "dataset": "un_multi",
214
+ "type": "role",
215
+ "origin_acc": 0.27,
216
+ "origin_prompt": "As a qualified translator, convert the given sentence from its original {} language to the target language {}:",
217
+ "atk_acc": 0.17,
218
+ "atk_prompt": "while a reflex translator, reverse the given sentence from its original {} language to the target languages {}: ",
219
+ "transfer_ori_acc": 0.3295374598940522,
220
+ "transfer_atk_acc": 0.3374240386421357
221
+ },
222
+ {
223
+ "dataset": "math",
224
+ "type": "task",
225
+ "origin_acc": 14.37,
226
+ "origin_prompt": "Obtain the result for this math question regarding {}:",
227
+ "atk_acc": 2.5,
228
+ "atk_prompt": "Obtain the procedure for this math question regarding {}:",
229
+ "transfer_ori_acc": 6.875000000000001,
230
+ "transfer_atk_acc": 0.625
231
+ },
232
+ {
233
+ "dataset": "math",
234
+ "type": "role",
235
+ "origin_acc": 13.12,
236
+ "origin_prompt": "As a mathematics instructor, calculate the answer to the following problem related to {}:",
237
+ "atk_acc": 1.88,
238
+ "atk_prompt": "As a mathematics instructor, calculate the sum to the following problem related to {}: ",
239
+ "transfer_ori_acc": 5.0,
240
+ "transfer_atk_acc": 2.5
241
+ }
242
+ ]
results_transfer/google-flan-t5-large_chatgpt/bertattack_3_shot.json ADDED
@@ -0,0 +1,242 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "dataset": "cola",
4
+ "type": "task",
5
+ "origin_acc": 78.1,
6
+ "origin_prompt": "Assess the following sentence and determine if it is grammatically correct. Respond with 'Acceptable' or 'Unacceptable'.",
7
+ "atk_acc": 69.9,
8
+ "atk_prompt": "Assess the following conversation and pick if it is grammatical: correct. interact with 'Acceptable' or 'Unacceptable'. ",
9
+ "transfer_ori_acc": 80.0,
10
+ "transfer_atk_acc": 80.5
11
+ },
12
+ {
13
+ "dataset": "cola",
14
+ "type": "role",
15
+ "origin_acc": 75.8,
16
+ "origin_prompt": "As an instrument for grammar evaluation, consider the sentence and determine if its grammar is correct, responding with 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar.",
17
+ "atk_acc": 69.2,
18
+ "atk_prompt": "As an item for grammar acquisition, determine the conversation and determine if its grammar is okay, responding with 'acceptable' for normal grammar or 'unacceptable' for using spelling. ",
19
+ "transfer_ori_acc": 81.5,
20
+ "transfer_atk_acc": 82.0
21
+ },
22
+ {
23
+ "dataset": "mrpc",
24
+ "type": "task",
25
+ "origin_acc": 82.6,
26
+ "origin_prompt": "Can the given sentences be considered semantically identical? Please reply with 'equivalent' or 'not_equivalent'.",
27
+ "atk_acc": 73.28,
28
+ "atk_prompt": "can the given words be considered semantically adjacent? Please respond with 'equivalent' or 'not_equivalent'. ",
29
+ "transfer_ori_acc": 72.0,
30
+ "transfer_atk_acc": 72.5
31
+ },
32
+ {
33
+ "dataset": "mrpc",
34
+ "type": "role",
35
+ "origin_acc": 81.37,
36
+ "origin_prompt": "As a semantic comparison expert, evaluate the given pair of sentences and determine if they are 'equivalent' or 'not_equivalent'.",
37
+ "atk_acc": 68.87,
38
+ "atk_prompt": "As a grammatical compares professionals, find the given language of sentence and determine if they are 'equivalent' or 'not_equivalent'. ",
39
+ "transfer_ori_acc": 70.0,
40
+ "transfer_atk_acc": 66.0
41
+ },
42
+ {
43
+ "dataset": "qnli",
44
+ "type": "task",
45
+ "origin_acc": 89.3,
46
+ "origin_prompt": "Based on the provided context and question, decide if the information supports the answer by responding with 'entailment' or 'not_entailment'.",
47
+ "atk_acc": 45.5,
48
+ "atk_prompt": "from on the supplied context and questions, save if the information supports the answer by simply with 'entailment' or 'not_entailment'. ",
49
+ "transfer_ori_acc": 82.0,
50
+ "transfer_atk_acc": 81.0
51
+ },
52
+ {
53
+ "dataset": "qnli",
54
+ "type": "role",
55
+ "origin_acc": 88.5,
56
+ "origin_prompt": "In your role as a semantic evaluator, determine if the provided context justifies the answer to the question and answer with 'entailment' or 'not_entailment'.",
57
+ "atk_acc": 40.6,
58
+ "atk_prompt": "when your role as a semantic evaluator, be if the reference subject justmatches the answer to the answer and also with 'entailment' or 'not_entailment'. ",
59
+ "transfer_ori_acc": 81.5,
60
+ "transfer_atk_acc": 76.5
61
+ },
62
+ {
63
+ "dataset": "qqp",
64
+ "type": "task",
65
+ "origin_acc": 88.1,
66
+ "origin_prompt": "Are the following two questions equivalent or not? Answer me with \"equivalent\" or \"not_equivalent\".",
67
+ "atk_acc": 82.5,
68
+ "atk_prompt": "Are the so a equations equivalent or not? Answer me with \"equivalent\" or \"not_equivalent\". ",
69
+ "transfer_ori_acc": 83.0,
70
+ "transfer_atk_acc": 82.5
71
+ },
72
+ {
73
+ "dataset": "qqp",
74
+ "type": "role",
75
+ "origin_acc": 86.5,
76
+ "origin_prompt": "As an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions.",
77
+ "atk_acc": 63.9,
78
+ "atk_prompt": "to an instrument for question ranking evaluation, consider the words and pick if their mood is the same, returning with 'equivalent' for analogous question or 'not_equivalent' for known questions. ",
79
+ "transfer_ori_acc": 83.0,
80
+ "transfer_atk_acc": 81.5
81
+ },
82
+ {
83
+ "dataset": "rte",
84
+ "type": "task",
85
+ "origin_acc": 85.2,
86
+ "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.",
87
+ "atk_acc": 80.87,
88
+ "atk_prompt": "can the is compare sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word. ",
89
+ "transfer_ori_acc": 83.0,
90
+ "transfer_atk_acc": 83.0
91
+ },
92
+ {
93
+ "dataset": "rte",
94
+ "type": "role",
95
+ "origin_acc": 84.12,
96
+ "origin_prompt": "Acting as an entailment detection instrument, determine if the given pair of sentences demonstrates entailment or not_entailment. Answer with 'entailment' or 'not_entailment'.",
97
+ "atk_acc": 78.7,
98
+ "atk_prompt": "Acting as an entailment detection instrument, be if the given language of sentences does entailment or not_entailment. start with 'entailment' or 'not_entailment'. ",
99
+ "transfer_ori_acc": 83.5,
100
+ "transfer_atk_acc": 84.0
101
+ },
102
+ {
103
+ "dataset": "sst2",
104
+ "type": "task",
105
+ "origin_acc": 95.3,
106
+ "origin_prompt": "Considering the given phrase, would you say it carries a 'positive' or 'negative' connotation?",
107
+ "atk_acc": 91.51,
108
+ "atk_prompt": "given the verbal conjunction, who you say it implies a 'positive' or 'negative' connotation? ",
109
+ "transfer_ori_acc": 85.5,
110
+ "transfer_atk_acc": 91.0
111
+ },
112
+ {
113
+ "dataset": "sst2",
114
+ "type": "role",
115
+ "origin_acc": 95.41,
116
+ "origin_prompt": "Acting as a sentiment evaluator, identify if the given sentence is 'positive' or 'negative'.",
117
+ "atk_acc": 92.43,
118
+ "atk_prompt": "Acting as a positive evaluator, choose if the positive sentence is 'positive' or 'negative'. ",
119
+ "transfer_ori_acc": 96.0,
120
+ "transfer_atk_acc": 97.0
121
+ },
122
+ {
123
+ "dataset": "wnli",
124
+ "type": "task",
125
+ "origin_acc": 64.79,
126
+ "origin_prompt": "Assess the connection between the following sentences and classify it as 'entailment' or 'not_entailment'.",
127
+ "atk_acc": 54.93,
128
+ "atk_prompt": "Assess the connecting between the relevant sentences and classify it as 'entailment' or 'not_entailment'. ",
129
+ "transfer_ori_acc": 66.19718309859155,
130
+ "transfer_atk_acc": 57.74647887323944
131
+ },
132
+ {
133
+ "dataset": "wnli",
134
+ "type": "role",
135
+ "origin_acc": 66.2,
136
+ "origin_prompt": "Functioning as an entailment evaluation tool, analyze the provided sentences and decide if their relationship is 'entailment' or 'not_entailment'.",
137
+ "atk_acc": 49.3,
138
+ "atk_prompt": "Functioning as an entailment evaluation tool, analyze the generated sentences and place if their conversation is 'entailment' or 'not_entailment'. ",
139
+ "transfer_ori_acc": 66.19718309859155,
140
+ "transfer_atk_acc": 66.19718309859155
141
+ },
142
+ {
143
+ "dataset": "mmlu",
144
+ "type": "task",
145
+ "origin_acc": 46.45,
146
+ "origin_prompt": "For each multiple-choice question about {}, identify the correct answer by selecting 'A', 'B', 'C', or 'D'.",
147
+ "atk_acc": 16.84,
148
+ "atk_prompt": "For each multiple-. question about {}, reveal the incorrect ask by selecting 'A', 'B', 'C', or 'D'. ",
149
+ "transfer_ori_acc": 66.0,
150
+ "transfer_atk_acc": 25.0
151
+ },
152
+ {
153
+ "dataset": "mmlu",
154
+ "type": "role",
155
+ "origin_acc": 46.99,
156
+ "origin_prompt": "Considering your familiarity with {}, attend to the following multiple-choice question by picking 'A', 'B', 'C', or 'D'.",
157
+ "atk_acc": 43.44,
158
+ "atk_prompt": "Considering your familiarity with {}, attend to the following multiple-of question by options 'A', 'B', 'C', or 'D'.",
159
+ "transfer_ori_acc": 66.0,
160
+ "transfer_atk_acc": 63.0
161
+ },
162
+ {
163
+ "dataset": "squad_v2",
164
+ "type": "task",
165
+ "origin_acc": 87.91,
166
+ "origin_prompt": "From the context, provide the most precise answer. If the answer is not in the context, respond with 'unanswerable'.",
167
+ "atk_acc": 73.79,
168
+ "atk_prompt": "about the context, provide the most vague explanation. given the answered is not in the sentence, respond with 'unanswerable'.",
169
+ "transfer_ori_acc": 69.28878149477491,
170
+ "transfer_atk_acc": 74.23346485915137
171
+ },
172
+ {
173
+ "dataset": "squad_v2",
174
+ "type": "role",
175
+ "origin_acc": 87.05,
176
+ "origin_prompt": "As a subject matter expert, extract the correct answer from the context. If an answer is not present, indicate 'unanswerable'.",
177
+ "atk_acc": 43.0,
178
+ "atk_prompt": "provide a question ask specialist, extract the incorrect field from the context. If an answer is not present, indicate 'unanswerable'.",
179
+ "transfer_ori_acc": 72.59620212188865,
180
+ "transfer_atk_acc": 71.34409116095414
181
+ },
182
+ {
183
+ "dataset": "iwslt",
184
+ "type": "task",
185
+ "origin_acc": 0.22,
186
+ "origin_prompt": "Convert the following sentence from its original {} language to the target language {}.",
187
+ "atk_acc": 0.21,
188
+ "atk_prompt": "replace the this verb from its originally {} language to the target language {}. ",
189
+ "transfer_ori_acc": 0.23822015459148888,
190
+ "transfer_atk_acc": 0.2403233731592474
191
+ },
192
+ {
193
+ "dataset": "iwslt",
194
+ "type": "role",
195
+ "origin_acc": 0.22,
196
+ "origin_prompt": "As a qualified translator, convert the given sentence from its original {} language to the target language {}.",
197
+ "atk_acc": 0.21,
198
+ "atk_prompt": "form a qualified translator, manipulate the given example from its original {} languages to the target language {}. ",
199
+ "transfer_ori_acc": 0.23739541096852568,
200
+ "transfer_atk_acc": 0.24074409492166066
201
+ },
202
+ {
203
+ "dataset": "un_multi",
204
+ "type": "task",
205
+ "origin_acc": 0.31,
206
+ "origin_prompt": "Convert the following sentence from its original {} language to the target language {}.",
207
+ "atk_acc": 0.3,
208
+ "atk_prompt": "translate the second sentence from its original {} language to the target language {}. ",
209
+ "transfer_ori_acc": 0.34683343580119863,
210
+ "transfer_atk_acc": 0.3530788033705874
211
+ },
212
+ {
213
+ "dataset": "un_multi",
214
+ "type": "role",
215
+ "origin_acc": 0.31,
216
+ "origin_prompt": "As a qualified translator, convert the given sentence from its original {} language to the target language {}.",
217
+ "atk_acc": 0.3,
218
+ "atk_prompt": "As a trained translator, convert the given case from its original {} language to the project languages {}. ",
219
+ "transfer_ori_acc": 0.35108303624453957,
220
+ "transfer_atk_acc": 0.3516552246010325
221
+ },
222
+ {
223
+ "dataset": "math",
224
+ "type": "task",
225
+ "origin_acc": 17.5,
226
+ "origin_prompt": "Find the solution for this mathematical challenge with {}.",
227
+ "atk_acc": 15.0,
228
+ "atk_prompt": "Find the conclusion for this mathematical challenge with {}. ",
229
+ "transfer_ori_acc": 23.125,
230
+ "transfer_atk_acc": 20.0
231
+ },
232
+ {
233
+ "dataset": "math",
234
+ "type": "role",
235
+ "origin_acc": 18.12,
236
+ "origin_prompt": "Embodying a mathematics advisor, find the answer to the following math problem with {}.",
237
+ "atk_acc": 9.38,
238
+ "atk_prompt": "Embodying a mathematics advisor, find the subject to the such math item with {}. ",
239
+ "transfer_ori_acc": 23.125,
240
+ "transfer_atk_acc": 25.624999999999996
241
+ }
242
+ ]
results_transfer/google-flan-t5-large_chatgpt/checklist_0_shot.json ADDED
@@ -0,0 +1,242 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "dataset": "cola",
4
+ "type": "task",
5
+ "origin_acc": 77.9,
6
+ "origin_prompt": "Examine the given sentence and decide if it is grammatically sound. Answer with either 'Acceptable' or 'Unacceptable':",
7
+ "atk_acc": 62.6,
8
+ "atk_prompt": "Examine the given sentence and decide if it is grammatically sound. Answer with either 'Acceptable' or 'Unacceptable' IhuBIhoPGc:",
9
+ "transfer_ori_acc": 77.0,
10
+ "transfer_atk_acc": 75.0
11
+ },
12
+ {
13
+ "dataset": "cola",
14
+ "type": "role",
15
+ "origin_acc": 76.3,
16
+ "origin_prompt": "As an instrument for grammar evaluation, consider the sentence and determine if its grammar is correct, responding with 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar:",
17
+ "atk_acc": 74.9,
18
+ "atk_prompt": "As an instrument for grammar evaluation, consider the sentence and determine if its grammar is correct, responding with 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar IhuBIhoPGc:",
19
+ "transfer_ori_acc": 78.5,
20
+ "transfer_atk_acc": 72.0
21
+ },
22
+ {
23
+ "dataset": "mrpc",
24
+ "type": "task",
25
+ "origin_acc": 82.11,
26
+ "origin_prompt": "Do these two sentences have the same underlying meaning? Respond with 'equivalent' or 'not_equivalent'.",
27
+ "atk_acc": 79.17,
28
+ "atk_prompt": "Do these two sentences have the same underlying meaning? Respond with 'equivalent' or 'not_equivalent' MSAddJ4D2a. ",
29
+ "transfer_ori_acc": 73.5,
30
+ "transfer_atk_acc": 75.0
31
+ },
32
+ {
33
+ "dataset": "mrpc",
34
+ "type": "role",
35
+ "origin_acc": 82.6,
36
+ "origin_prompt": "As a semantic comparison expert, evaluate the given pair of sentences and determine if they are 'equivalent' or 'not_equivalent'.",
37
+ "atk_acc": 80.15,
38
+ "atk_prompt": "As a semantic comparison expert, evaluate the given pair of sentences and determine if they are 'equivalent' or 'not_equivalent' yh912BU8T0. ",
39
+ "transfer_ori_acc": 72.5,
40
+ "transfer_atk_acc": 74.0
41
+ },
42
+ {
43
+ "dataset": "qnli",
44
+ "type": "task",
45
+ "origin_acc": 88.5,
46
+ "origin_prompt": "Based on the provided context and question, decide if the information supports the answer by responding with 'entailment' or 'not_entailment'.",
47
+ "atk_acc": 87.1,
48
+ "atk_prompt": "Based on the provided context and question, decide if the information supports the answer by responding with 'entailment' or 'not_entailment' S6oUV5E54P. ",
49
+ "transfer_ori_acc": 78.0,
50
+ "transfer_atk_acc": 75.5
51
+ },
52
+ {
53
+ "dataset": "qnli",
54
+ "type": "role",
55
+ "origin_acc": 89.4,
56
+ "origin_prompt": "As a semantic interpreter, assess whether the provided context supports the answer to the given question and answer with 'entailment' or 'not_entailment'.",
57
+ "atk_acc": 88.7,
58
+ "atk_prompt": "As a semantic interpreter, assess whether the provided context supports the answer to the given question and answer with 'entailment' or 'not_entailment' ofOQXLxiW4. ",
59
+ "transfer_ori_acc": 67.5,
60
+ "transfer_atk_acc": 72.5
61
+ },
62
+ {
63
+ "dataset": "qqp",
64
+ "type": "task",
65
+ "origin_acc": 87.7,
66
+ "origin_prompt": "Determine if the given pair of statements can be considered the same by responding with 'equivalent' or 'not_equivalent'.",
67
+ "atk_acc": 86.5,
68
+ "atk_prompt": "Determine if the given pair of statements can be considered the same by responding with 'equivalent' or 'not_equivalent' Kw6nrs57gH. ",
69
+ "transfer_ori_acc": 81.5,
70
+ "transfer_atk_acc": 81.5
71
+ },
72
+ {
73
+ "dataset": "qqp",
74
+ "type": "role",
75
+ "origin_acc": 86.9,
76
+ "origin_prompt": "Functioning as a question similarity evaluation tool, analyze the given questions and decide if they share the same meaning, responding with 'equivalent' or 'not_equivalent'.",
77
+ "atk_acc": 86.6,
78
+ "atk_prompt": "Functioning as a question similarity evaluation tool, analyze the given questions and decide if they share the same meaning, responding with 'equivalent' or 'not_equivalent' Kw6nrs57gH. ",
79
+ "transfer_ori_acc": 81.5,
80
+ "transfer_atk_acc": 82.0
81
+ },
82
+ {
83
+ "dataset": "rte",
84
+ "type": "task",
85
+ "origin_acc": 86.64,
86
+ "origin_prompt": "Indicate if the connection between the following sentences is 'entailment' or 'not_entailment'.",
87
+ "atk_acc": 83.39,
88
+ "atk_prompt": "Indicate if the connection between the following sentences is 'entailment' or 'not_entailment' vTAjHynoIG.",
89
+ "transfer_ori_acc": 76.5,
90
+ "transfer_atk_acc": 77.5
91
+ },
92
+ {
93
+ "dataset": "rte",
94
+ "type": "role",
95
+ "origin_acc": 84.84,
96
+ "origin_prompt": "While performing entailment analysis, classify the relationship between the provided sentences as 'entailment' or 'not_entailment':",
97
+ "atk_acc": 83.75,
98
+ "atk_prompt": "While performing entailment analysis, classify the relationship between the provided sentences as 'entailment' or 'not_entailment' CuVpbbkC4c:",
99
+ "transfer_ori_acc": 79.0,
100
+ "transfer_atk_acc": 80.5
101
+ },
102
+ {
103
+ "dataset": "sst2",
104
+ "type": "task",
105
+ "origin_acc": 94.95,
106
+ "origin_prompt": "Given the context of this text, indicate if the emotion conveyed is 'positive' or 'negative':",
107
+ "atk_acc": 93.58,
108
+ "atk_prompt": "Given the context of this text, indicate if the emotion conveyed is 'positive' or 'negative' LkF0FZxMZ4: ",
109
+ "transfer_ori_acc": 86.5,
110
+ "transfer_atk_acc": 87.0
111
+ },
112
+ {
113
+ "dataset": "sst2",
114
+ "type": "role",
115
+ "origin_acc": 95.3,
116
+ "origin_prompt": "Functioning as a sentiment identification tool, assess if the following expression is 'positive' or 'negative'. Classify:",
117
+ "atk_acc": 94.15,
118
+ "atk_prompt": "Functioning as a sentiment identification tool, assess if the following expression is 'positive' or 'negative'. Classify ItGDrrA1Sc: ",
119
+ "transfer_ori_acc": 92.0,
120
+ "transfer_atk_acc": 93.0
121
+ },
122
+ {
123
+ "dataset": "wnli",
124
+ "type": "task",
125
+ "origin_acc": 64.79,
126
+ "origin_prompt": "Indicate if the connection between the following sentences is 'entailment' or 'not_entailment'.",
127
+ "atk_acc": 61.97,
128
+ "atk_prompt": "Indicate if the connection between the following sentences is 'entailment' or 'not_entailment' Kw6nrs57gH.",
129
+ "transfer_ori_acc": 52.112676056338024,
130
+ "transfer_atk_acc": 57.74647887323944
131
+ },
132
+ {
133
+ "dataset": "wnli",
134
+ "type": "role",
135
+ "origin_acc": 66.2,
136
+ "origin_prompt": "As an entailment identification system, examine the connection between the following sentences and respond with 'entailment' or 'not_entailment':",
137
+ "atk_acc": 61.97,
138
+ "atk_prompt": "As an entailment identification system, examine the connection between the following sentences and respond with 'entailment' or 'not_entailment' SFPCqpiClT:",
139
+ "transfer_ori_acc": 50.70422535211267,
140
+ "transfer_atk_acc": 53.52112676056338
141
+ },
142
+ {
143
+ "dataset": "mmlu",
144
+ "type": "task",
145
+ "origin_acc": 45.04,
146
+ "origin_prompt": "Please respond to the multiple-choice question about {} by selecting the appropriate answer: 'A', 'B', 'C', or 'D'.",
147
+ "atk_acc": 43.97,
148
+ "atk_prompt": "Please respond to the multiple-choice question about {} by selecting the appropriate answer: 'A', 'B', 'C', or 'D' ItGDrrA1Sc. ",
149
+ "transfer_ori_acc": 62.0,
150
+ "transfer_atk_acc": 60.5
151
+ },
152
+ {
153
+ "dataset": "mmlu",
154
+ "type": "role",
155
+ "origin_acc": 44.86,
156
+ "origin_prompt": "As someone well-versed in {}, please address the multiple-choice question below by selecting 'A', 'B', 'C', or 'D'.",
157
+ "atk_acc": 43.44,
158
+ "atk_prompt": "As someone well-versed in {}, please address the multiple-choice question below by selecting 'A', 'B', 'C', or 'D' CuVpbbkC4c.",
159
+ "transfer_ori_acc": 59.5,
160
+ "transfer_atk_acc": 61.0
161
+ },
162
+ {
163
+ "dataset": "squad_v2",
164
+ "type": "task",
165
+ "origin_acc": 87.65,
166
+ "origin_prompt": "Please derive the most fitting answer from the context. If there isn't an answer in the context, respond with 'unanswerable'.",
167
+ "atk_acc": 86.82,
168
+ "atk_prompt": "Please derive the most fitting answer from the context. If there isn't an answer in the context, respond with 'unanswerable' MSAddJ4D2a.",
169
+ "transfer_ori_acc": 69.02399528817593,
170
+ "transfer_atk_acc": 69.50796630082341
171
+ },
172
+ {
173
+ "dataset": "squad_v2",
174
+ "type": "role",
175
+ "origin_acc": 88.05,
176
+ "origin_prompt": "As an authority on the context, locate the most accurate answer. If the context doesn't contain the answer, mention 'unanswerable'.",
177
+ "atk_acc": 87.15,
178
+ "atk_prompt": "As an authority on the context, locate the most accurate answer. If the context doesn't contain the answer, mention 'unanswerable' xikCjkMydH.",
179
+ "transfer_ori_acc": 58.75591830538296,
180
+ "transfer_atk_acc": 61.29287775116594
181
+ },
182
+ {
183
+ "dataset": "iwslt",
184
+ "type": "task",
185
+ "origin_acc": 0.14,
186
+ "origin_prompt": "Please perform a translation of the given sentence, converting it from {} to {}:",
187
+ "atk_acc": 0.1,
188
+ "atk_prompt": "Please perform a translation of the given sentence, converting it from {} to mzjVogkRhM {}:",
189
+ "transfer_ori_acc": 0.21840008360286153,
190
+ "transfer_atk_acc": 0.22514742934128096
191
+ },
192
+ {
193
+ "dataset": "iwslt",
194
+ "type": "role",
195
+ "origin_acc": 0.14,
196
+ "origin_prompt": "Taking on the role of a certified translator, perform a translation of the given sentence from {} to {}:",
197
+ "atk_acc": 0.08,
198
+ "atk_prompt": "Taking on the role of a certified translator, perform a translation of the given sentence from {} to zexKUTWJYG {}: ",
199
+ "transfer_ori_acc": 0.22600539164221617,
200
+ "transfer_atk_acc": 0.22448676631201958
201
+ },
202
+ {
203
+ "dataset": "un_multi",
204
+ "type": "task",
205
+ "origin_acc": 0.27,
206
+ "origin_prompt": "Accurately translate the sentence from {} to {}, ensuring the meaning remains intact:",
207
+ "atk_acc": 0.21,
208
+ "atk_prompt": "Accurately translate the sentence from {} to {}, ensuring the meaning remains intact EAB4KP2NVY:",
209
+ "transfer_ori_acc": 0.3279360242319025,
210
+ "transfer_atk_acc": 0.3291687239207952
211
+ },
212
+ {
213
+ "dataset": "un_multi",
214
+ "type": "role",
215
+ "origin_acc": 0.27,
216
+ "origin_prompt": "Assuming the role of an expert language translator, translate the given text from {} to {}:",
217
+ "atk_acc": 0.19,
218
+ "atk_prompt": "Assuming the role of an expert language translator, translate the given text from {} to vTAjHynoIG {}: ",
219
+ "transfer_ori_acc": 0.3317016758714199,
220
+ "transfer_atk_acc": 0.3316999414372619
221
+ },
222
+ {
223
+ "dataset": "math",
224
+ "type": "task",
225
+ "origin_acc": 16.25,
226
+ "origin_prompt": "Evaluate the following mathematical problem that includes {}:",
227
+ "atk_acc": 10.62,
228
+ "atk_prompt": "Evaluate the following mathematical problem that includes is77sOXAu8 {}:",
229
+ "transfer_ori_acc": 4.375,
230
+ "transfer_atk_acc": 4.375
231
+ },
232
+ {
233
+ "dataset": "math",
234
+ "type": "role",
235
+ "origin_acc": 14.37,
236
+ "origin_prompt": "As a math guru, evaluate the following mathematical task regarding {}:",
237
+ "atk_acc": 10.0,
238
+ "atk_prompt": "As a math guru, evaluate the following mathematical task regarding KjPJJ2a7RB {}: ",
239
+ "transfer_ori_acc": 4.375,
240
+ "transfer_atk_acc": 2.5
241
+ }
242
+ ]
results_transfer/google-flan-t5-large_chatgpt/checklist_3_shot.json ADDED
@@ -0,0 +1,242 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "dataset": "cola",
4
+ "type": "task",
5
+ "origin_acc": 78.1,
6
+ "origin_prompt": "Assess the following sentence and determine if it is grammatically correct. Respond with 'Acceptable' or 'Unacceptable'.",
7
+ "atk_acc": 77.0,
8
+ "atk_prompt": "Assess the following sentence and determine if it is grammatically correct. Respond with 'Acceptable' or 'Unacceptable' zexKUTWJYG. ",
9
+ "transfer_ori_acc": 80.0,
10
+ "transfer_atk_acc": 79.5
11
+ },
12
+ {
13
+ "dataset": "cola",
14
+ "type": "role",
15
+ "origin_acc": 75.7,
16
+ "origin_prompt": "While performing grammar analysis, classify the grammar of the following sentence as 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar.",
17
+ "atk_acc": 74.6,
18
+ "atk_prompt": "While performing grammar analysis, classify the grammar of the following sentence as 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar zexKUTWJYG. ",
19
+ "transfer_ori_acc": 82.0,
20
+ "transfer_atk_acc": 81.0
21
+ },
22
+ {
23
+ "dataset": "mrpc",
24
+ "type": "task",
25
+ "origin_acc": 82.84,
26
+ "origin_prompt": "Please analyze the provided sentences and indicate if their meanings are the same by choosing 'equivalent' or 'not_equivalent'.",
27
+ "atk_acc": 80.88,
28
+ "atk_prompt": "Please analyze the provided sentences and indicate if their meanings are the same by choosing 'equivalent' or 'not_equivalent' CuVpbbkC4c. ",
29
+ "transfer_ori_acc": 71.0,
30
+ "transfer_atk_acc": 70.5
31
+ },
32
+ {
33
+ "dataset": "mrpc",
34
+ "type": "role",
35
+ "origin_acc": 80.15,
36
+ "origin_prompt": "In the role of a sentence comparison analyst, assess the provided sentences and indicate if they convey the same meaning by selecting 'equivalent' or 'not_equivalent'.",
37
+ "atk_acc": 79.66,
38
+ "atk_prompt": "In the role of a sentence comparison analyst, assess the provided sentences and indicate if they convey the same meaning by selecting 'equivalent' or 'not_equivalent' BWmsLx8xOA. ",
39
+ "transfer_ori_acc": 71.5,
40
+ "transfer_atk_acc": 70.5
41
+ },
42
+ {
43
+ "dataset": "qnli",
44
+ "type": "task",
45
+ "origin_acc": 87.8,
46
+ "origin_prompt": "Based on the information in the context, decide if the answer to the question is justified by choosing 'entailment' or 'not_entailment'.",
47
+ "atk_acc": 86.4,
48
+ "atk_prompt": "Based on the information in the context, decide if the answer to the question is justified by choosing 'entailment' or 'not_entailment' mOJ9nEwT4f. ",
49
+ "transfer_ori_acc": 82.0,
50
+ "transfer_atk_acc": 80.5
51
+ },
52
+ {
53
+ "dataset": "qnli",
54
+ "type": "role",
55
+ "origin_acc": 88.5,
56
+ "origin_prompt": "In your role as a semantic evaluator, determine if the provided context justifies the answer to the question and answer with 'entailment' or 'not_entailment'.",
57
+ "atk_acc": 87.7,
58
+ "atk_prompt": "In your role as a semantic evaluator, determine if the provided context justifies the answer to the question and answer with 'entailment' or 'not_entailment' MSAddJ4D2a. ",
59
+ "transfer_ori_acc": 81.5,
60
+ "transfer_atk_acc": 83.0
61
+ },
62
+ {
63
+ "dataset": "qqp",
64
+ "type": "task",
65
+ "origin_acc": 87.6,
66
+ "origin_prompt": "Are the meanings of these two phrases the same? Reply with 'equivalent' or 'not_equivalent'.",
67
+ "atk_acc": 87.4,
68
+ "atk_prompt": "Are the meanings of these two phrases the same? Reply with 'equivalent' or 'not_equivalent' Pbl3tYuoRo. ",
69
+ "transfer_ori_acc": 81.0,
70
+ "transfer_atk_acc": 79.5
71
+ },
72
+ {
73
+ "dataset": "qqp",
74
+ "type": "role",
75
+ "origin_acc": 86.5,
76
+ "origin_prompt": "As an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions.",
77
+ "atk_acc": 86.0,
78
+ "atk_prompt": "As an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions 6a4Yn3RGVc. ",
79
+ "transfer_ori_acc": 83.0,
80
+ "transfer_atk_acc": 83.0
81
+ },
82
+ {
83
+ "dataset": "rte",
84
+ "type": "task",
85
+ "origin_acc": 84.84,
86
+ "origin_prompt": "Identify whether the given pair of sentences demonstrates entailment or not_entailment. Answer with 'entailment' or 'not_entailment'.",
87
+ "atk_acc": 83.39,
88
+ "atk_prompt": "Identify whether the given pair of sentences demonstrates entailment or not_entailment. Answer with 'entailment' or 'not_entailment' 6a4Yn3RGVc. ",
89
+ "transfer_ori_acc": 82.0,
90
+ "transfer_atk_acc": 82.5
91
+ },
92
+ {
93
+ "dataset": "rte",
94
+ "type": "role",
95
+ "origin_acc": 84.12,
96
+ "origin_prompt": "In the capacity of an entailment assessment system, indicate if the link between the following sentences is 'entailment' or 'not_entailment'.",
97
+ "atk_acc": 82.31,
98
+ "atk_prompt": "In the capacity of an entailment assessment system, indicate if the link between the following sentences is 'entailment' or 'not_entailment' MQnugHcaoy. ",
99
+ "transfer_ori_acc": 82.0,
100
+ "transfer_atk_acc": 81.5
101
+ },
102
+ {
103
+ "dataset": "sst2",
104
+ "type": "task",
105
+ "origin_acc": 94.95,
106
+ "origin_prompt": "Analyze the tone of this statement and respond with either 'positive' or 'negative'.",
107
+ "atk_acc": 94.5,
108
+ "atk_prompt": "Analyze the tone of this statement and respond with either 'positive' or 'negative' XStsD2pUzu. ",
109
+ "transfer_ori_acc": 94.5,
110
+ "transfer_atk_acc": 97.0
111
+ },
112
+ {
113
+ "dataset": "sst2",
114
+ "type": "role",
115
+ "origin_acc": 95.41,
116
+ "origin_prompt": "Acting as a sentiment evaluator, identify if the given sentence is 'positive' or 'negative'.",
117
+ "atk_acc": 94.84,
118
+ "atk_prompt": "Acting as a sentiment evaluator, identify if the given sentence is 'positive' or 'negative' SFPCqpiClT. ",
119
+ "transfer_ori_acc": 96.5,
120
+ "transfer_atk_acc": 96.0
121
+ },
122
+ {
123
+ "dataset": "wnli",
124
+ "type": "task",
125
+ "origin_acc": 64.79,
126
+ "origin_prompt": "Assess the connection between the following sentences and classify it as 'entailment' or 'not_entailment'.",
127
+ "atk_acc": 63.38,
128
+ "atk_prompt": "Assess the connection between the following sentences and classify it as 'entailment' or 'not_entailment' Cev0s49fbe. ",
129
+ "transfer_ori_acc": 66.19718309859155,
130
+ "transfer_atk_acc": 69.01408450704226
131
+ },
132
+ {
133
+ "dataset": "wnli",
134
+ "type": "role",
135
+ "origin_acc": 66.2,
136
+ "origin_prompt": "As an entailment identification system, examine the connection between the following sentences and respond with 'entailment' or 'not_entailment'.",
137
+ "atk_acc": 66.2,
138
+ "atk_prompt": "As an entailment identification system, examine the connection between the following sentences and respond with 'entailment' or 'not_entailment' 5XeflW1ZJc. ",
139
+ "transfer_ori_acc": 60.56338028169014,
140
+ "transfer_atk_acc": 63.38028169014085
141
+ },
142
+ {
143
+ "dataset": "mmlu",
144
+ "type": "task",
145
+ "origin_acc": 45.92,
146
+ "origin_prompt": "Answer the subsequent multiple-choice question about {} by picking the right option among 'A', 'B', 'C', or 'D'.",
147
+ "atk_acc": 45.04,
148
+ "atk_prompt": "Answer the subsequent multiple-choice question about {} by picking the right option among 'A', 'B', 'C', or 'D' KjPJJ2a7RB. ",
149
+ "transfer_ori_acc": 66.0,
150
+ "transfer_atk_acc": 66.0
151
+ },
152
+ {
153
+ "dataset": "mmlu",
154
+ "type": "role",
155
+ "origin_acc": 46.99,
156
+ "origin_prompt": "Considering your familiarity with {}, attend to the following multiple-choice question by picking 'A', 'B', 'C', or 'D'.",
157
+ "atk_acc": 45.04,
158
+ "atk_prompt": "Considering your familiarity with {}, attend to the following multiple-choice question by picking 'A', 'B', 'C', or 'D' SFPCqpiClT.",
159
+ "transfer_ori_acc": 65.5,
160
+ "transfer_atk_acc": 65.5
161
+ },
162
+ {
163
+ "dataset": "squad_v2",
164
+ "type": "task",
165
+ "origin_acc": 87.91,
166
+ "origin_prompt": "From the context, provide the most precise answer. If the answer is not in the context, respond with 'unanswerable'.",
167
+ "atk_acc": 87.05,
168
+ "atk_prompt": "From the context, provide the most precise answer. If the answer is not in the context, respond with 'unanswerable' 5yWbBXztUY.",
169
+ "transfer_ori_acc": 69.33640054239395,
170
+ "transfer_atk_acc": 68.59989260588603
171
+ },
172
+ {
173
+ "dataset": "squad_v2",
174
+ "type": "role",
175
+ "origin_acc": 87.05,
176
+ "origin_prompt": "Drawing upon your expertise in the context, determine the most suitable answer. If an answer isn't available, state 'unanswerable'.",
177
+ "atk_acc": 86.55,
178
+ "atk_prompt": "Drawing upon your expertise in the context, determine the most suitable answer. If an answer isn't available, state 'unanswerable' 5XeflW1ZJc.",
179
+ "transfer_ori_acc": 71.1631951530121,
180
+ "transfer_atk_acc": 74.10685231284573
181
+ },
182
+ {
183
+ "dataset": "iwslt",
184
+ "type": "task",
185
+ "origin_acc": 0.22,
186
+ "origin_prompt": "Convert the following sentence from its original {} language to the target language {}.",
187
+ "atk_acc": 0.22,
188
+ "atk_prompt": "Convert the following sentence from its original {} language to the target language LkF0FZxMZ4 {}. ",
189
+ "transfer_ori_acc": 0.23991880278182126,
190
+ "transfer_atk_acc": 0.2348130480000894
191
+ },
192
+ {
193
+ "dataset": "iwslt",
194
+ "type": "role",
195
+ "origin_acc": 0.22,
196
+ "origin_prompt": "In the role of a language expert, perform a machine translation for the provided sentence, changing it from {} to {}.",
197
+ "atk_acc": 0.21,
198
+ "atk_prompt": "In the role of a language expert, perform a machine translation for the provided sentence, changing it from {} to MQnugHcaoy {}. ",
199
+ "transfer_ori_acc": 0.244428238867859,
200
+ "transfer_atk_acc": 0.2280062687297454
201
+ },
202
+ {
203
+ "dataset": "un_multi",
204
+ "type": "task",
205
+ "origin_acc": 0.31,
206
+ "origin_prompt": "Translate the following text from the source language {} to the target language {}.",
207
+ "atk_acc": 0.3,
208
+ "atk_prompt": "Translate the following text from the source language {} to the target language Cev0s49fbe {}. ",
209
+ "transfer_ori_acc": 0.3520628408924414,
210
+ "transfer_atk_acc": 0.35416969837419654
211
+ },
212
+ {
213
+ "dataset": "un_multi",
214
+ "type": "role",
215
+ "origin_acc": 0.31,
216
+ "origin_prompt": "In the capacity of a proficient translator, please change the following sentence from {} to {}.",
217
+ "atk_acc": 0.29,
218
+ "atk_prompt": "In the capacity of a proficient translator, please change the following sentence from {} to OB4KVJzIft {}. ",
219
+ "transfer_ori_acc": 0.3486649402303781,
220
+ "transfer_atk_acc": 0.3424551550058589
221
+ },
222
+ {
223
+ "dataset": "math",
224
+ "type": "task",
225
+ "origin_acc": 17.5,
226
+ "origin_prompt": "Resolve the following mathematical question associated with {}.",
227
+ "atk_acc": 15.62,
228
+ "atk_prompt": "Resolve the following mathematical question associated with 6a4Yn3RGVc {}. ",
229
+ "transfer_ori_acc": 22.5,
230
+ "transfer_atk_acc": 24.375
231
+ },
232
+ {
233
+ "dataset": "math",
234
+ "type": "role",
235
+ "origin_acc": 17.5,
236
+ "origin_prompt": "Imagine being a math mentor and determine the solution to the following problem with {}.",
237
+ "atk_acc": 15.62,
238
+ "atk_prompt": "Imagine being a math mentor and determine the solution to the following problem with b74X5IVXQY {}. ",
239
+ "transfer_ori_acc": 22.5,
240
+ "transfer_atk_acc": 23.75
241
+ }
242
+ ]
results_transfer/google-flan-t5-large_chatgpt/deepwordbug_0_shot.json ADDED
@@ -0,0 +1,242 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "dataset": "cola",
4
+ "type": "task",
5
+ "origin_acc": 77.9,
6
+ "origin_prompt": "Examine the given sentence and decide if it is grammatically sound. Answer with either 'Acceptable' or 'Unacceptable':",
7
+ "atk_acc": 31.1,
8
+ "atk_prompt": "Exapine the gievn sentene and dVecide if it is grammktically sound. Answer with either 'Acceptable' or 'Unacceptable':",
9
+ "transfer_ori_acc": 77.0,
10
+ "transfer_atk_acc": 75.5
11
+ },
12
+ {
13
+ "dataset": "cola",
14
+ "type": "role",
15
+ "origin_acc": 76.3,
16
+ "origin_prompt": "As an instrument for grammar evaluation, consider the sentence and determine if its grammar is correct, responding with 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar:",
17
+ "atk_acc": 36.5,
18
+ "atk_prompt": "sA an instrument for grammar evaluatiJn, cnsider the setnence and deterbmine if its grmamar is crorect, reYponding with 'acceptable' for corect gramOar or 'unacceptable' for incorGect rgammar:",
19
+ "transfer_ori_acc": 79.5,
20
+ "transfer_atk_acc": 80.0
21
+ },
22
+ {
23
+ "dataset": "mrpc",
24
+ "type": "task",
25
+ "origin_acc": 82.35,
26
+ "origin_prompt": "Compare the following sentences and determine if they share the same semantic meaning by responding with 'equivalent' or 'not_equivalent'.",
27
+ "atk_acc": 76.96,
28
+ "atk_prompt": "Compar the folhowing sentences and deWermine if they share the same semantic emaning by respondig with 'equivalent' or 'not_equivalent'. ",
29
+ "transfer_ori_acc": 73.0,
30
+ "transfer_atk_acc": 76.0
31
+ },
32
+ {
33
+ "dataset": "mrpc",
34
+ "type": "role",
35
+ "origin_acc": 82.6,
36
+ "origin_prompt": "In your capacity as a semantic comparison tool, examine the given sentences and decide if their meanings are identical, responding with 'equivalent' or 'not_equivalent'.",
37
+ "atk_acc": 62.25,
38
+ "atk_prompt": "In your capacity as a seantic coGparison tolo, exaine the given esentences and decide if their meanings are identiacl, resYonding with 'equivalent' or 'not_equivalent'. ",
39
+ "transfer_ori_acc": 75.0,
40
+ "transfer_atk_acc": 69.0
41
+ },
42
+ {
43
+ "dataset": "qnli",
44
+ "type": "task",
45
+ "origin_acc": 89.8,
46
+ "origin_prompt": "Evaluate whether the given context supports the answer to the question by responding with 'entailment' or 'not_entailment'.",
47
+ "atk_acc": 38.6,
48
+ "atk_prompt": "Evlauate whyther the giGven cotext supporzs the answeQ to the quePtion by rseponding with 'entailment' or 'not_entailment'. ",
49
+ "transfer_ori_acc": 78.0,
50
+ "transfer_atk_acc": 74.5
51
+ },
52
+ {
53
+ "dataset": "qnli",
54
+ "type": "role",
55
+ "origin_acc": 89.4,
56
+ "origin_prompt": "As a semantic interpreter, assess whether the provided context supports the answer to the given question and answer with 'entailment' or 'not_entailment'.",
57
+ "atk_acc": 2.1,
58
+ "atk_prompt": "sA a esmantic interpreter, aBssess whehter the proviedd contexYt spuports the answher to the gEven questin and answOr with 'entailment' or 'not_entailment'. ",
59
+ "transfer_ori_acc": 68.0,
60
+ "transfer_atk_acc": 65.0
61
+ },
62
+ {
63
+ "dataset": "qqp",
64
+ "type": "task",
65
+ "origin_acc": 87.8,
66
+ "origin_prompt": "Can these two statements be considered equal in meaning? Answer with 'equivalent' or 'not_equivalent'.",
67
+ "atk_acc": 59.8,
68
+ "atk_prompt": "Can these tow statYements be cnosidered qeual in maening? AnsweI with 'equivalent' or 'not_equivalent'. ",
69
+ "transfer_ori_acc": 81.0,
70
+ "transfer_atk_acc": 72.0
71
+ },
72
+ {
73
+ "dataset": "qqp",
74
+ "type": "role",
75
+ "origin_acc": 86.9,
76
+ "origin_prompt": "Functioning as a question similarity evaluation tool, analyze the given questions and decide if they share the same meaning, responding with 'equivalent' or 'not_equivalent'.",
77
+ "atk_acc": 81.4,
78
+ "atk_prompt": "Functioning as a question smiilarity eavluation toXl, analyze the givYen questions and decidae if they share the same meajning, responding with 'equivalent' or 'not_equivalent'. ",
79
+ "transfer_ori_acc": 81.5,
80
+ "transfer_atk_acc": 81.5
81
+ },
82
+ {
83
+ "dataset": "rte",
84
+ "type": "task",
85
+ "origin_acc": 86.64,
86
+ "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.",
87
+ "atk_acc": 81.95,
88
+ "atk_prompt": "Aer the following two Ysentences entailment or not_entailment? nswer me with \"entailment\" or \"not_entailment\", just one word. ",
89
+ "transfer_ori_acc": 81.0,
90
+ "transfer_atk_acc": 83.5
91
+ },
92
+ {
93
+ "dataset": "rte",
94
+ "type": "role",
95
+ "origin_acc": 84.84,
96
+ "origin_prompt": "While performing entailment analysis, classify the relationship between the provided sentences as 'entailment' or 'not_entailment':",
97
+ "atk_acc": 82.67,
98
+ "atk_prompt": "While performing entailment analysis, cassify the reiationship between the proviLded sentences as 'entailment' or 'not_entailment':",
99
+ "transfer_ori_acc": 79.0,
100
+ "transfer_atk_acc": 77.5
101
+ },
102
+ {
103
+ "dataset": "sst2",
104
+ "type": "task",
105
+ "origin_acc": 94.84,
106
+ "origin_prompt": "Analyze the tone of this statement and respond with either 'positive' or 'negative':",
107
+ "atk_acc": 94.04,
108
+ "atk_prompt": "Analze the tone of this sattement and respond with either 'positive' or 'negative': ",
109
+ "transfer_ori_acc": 89.5,
110
+ "transfer_atk_acc": 90.0
111
+ },
112
+ {
113
+ "dataset": "sst2",
114
+ "type": "role",
115
+ "origin_acc": 95.3,
116
+ "origin_prompt": "Serving as a sentiment evaluation model, determine if the given statement is 'positive' or 'negative'. Classify:",
117
+ "atk_acc": 64.91,
118
+ "atk_prompt": "Servign as a sentimBnt envaluation model, Qetermine if the Iiven statemen is 'positive' or 'negative'. Classhfy: ",
119
+ "transfer_ori_acc": 92.0,
120
+ "transfer_atk_acc": 91.0
121
+ },
122
+ {
123
+ "dataset": "wnli",
124
+ "type": "task",
125
+ "origin_acc": 66.2,
126
+ "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.",
127
+ "atk_acc": 60.56,
128
+ "atk_prompt": "Are the follHwing Ntwo sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one wor. ",
129
+ "transfer_ori_acc": 66.19718309859155,
130
+ "transfer_atk_acc": 64.7887323943662
131
+ },
132
+ {
133
+ "dataset": "wnli",
134
+ "type": "role",
135
+ "origin_acc": 66.2,
136
+ "origin_prompt": "As an entailment identification system, examine the connection between the following sentences and respond with 'entailment' or 'not_entailment':",
137
+ "atk_acc": 57.75,
138
+ "atk_prompt": "As an entailment identfiication zystem, examine the connection between the flolowing sentences and respond with 'entailment' or 'not_entailment':",
139
+ "transfer_ori_acc": 52.112676056338024,
140
+ "transfer_atk_acc": 50.70422535211267
141
+ },
142
+ {
143
+ "dataset": "mmlu",
144
+ "type": "task",
145
+ "origin_acc": 45.74,
146
+ "origin_prompt": "For each multiple-choice question about {}, identify the correct answer by selecting 'A', 'B', 'C', or 'D'.",
147
+ "atk_acc": 42.2,
148
+ "atk_prompt": "For each multiple-chotce question about {}, dentify the caorrect answedr by selccting 'A', 'B', 'C', or 'D'. ",
149
+ "transfer_ori_acc": 59.5,
150
+ "transfer_atk_acc": 63.5
151
+ },
152
+ {
153
+ "dataset": "mmlu",
154
+ "type": "role",
155
+ "origin_acc": 44.86,
156
+ "origin_prompt": "Utilizing your expertise in {}, answer the following multiple-choice question by picking 'A', 'B', 'C', or 'D'.",
157
+ "atk_acc": 41.13,
158
+ "atk_prompt": "Utilizing your expertise in {}, ansKer the following multiprle-choice question by ipcking 'A', 'B', 'C', or 'D'.",
159
+ "transfer_ori_acc": 60.5,
160
+ "transfer_atk_acc": 61.0
161
+ },
162
+ {
163
+ "dataset": "squad_v2",
164
+ "type": "task",
165
+ "origin_acc": 87.55,
166
+ "origin_prompt": "Find the correct answer in the context provided. If an answer cannot be found, please respond with 'unanswerable'.",
167
+ "atk_acc": 85.32,
168
+ "atk_prompt": "Find the Dcorrect answer in the context provided. GIf an answer cannot be found, please erspond with 'unanswerable'.",
169
+ "transfer_ori_acc": 67.5632226901742,
170
+ "transfer_atk_acc": 66.07909004721805
171
+ },
172
+ {
173
+ "dataset": "squad_v2",
174
+ "type": "role",
175
+ "origin_acc": 88.05,
176
+ "origin_prompt": "As an expert with a deep understanding of the context, find the best answer. If the context doesn't include an answer, say 'unanswerable'.",
177
+ "atk_acc": 86.92,
178
+ "atk_prompt": "As an expert with a deep understanding of the context, fHind the best answer. If the context doesn't include an answer, say 'unanswerable'.",
179
+ "transfer_ori_acc": 64.66646500169827,
180
+ "transfer_atk_acc": 63.33777694425634
181
+ },
182
+ {
183
+ "dataset": "iwslt",
184
+ "type": "task",
185
+ "origin_acc": 0.14,
186
+ "origin_prompt": "Perform a machine translation for the provided sentence, changing it from {} to {}:",
187
+ "atk_acc": 0.08,
188
+ "atk_prompt": "Perorm a nachine translatiou for the providde sentecne, changing it from {} to {}:",
189
+ "transfer_ori_acc": 0.22160247790642423,
190
+ "transfer_atk_acc": 0.22247429357782236
191
+ },
192
+ {
193
+ "dataset": "iwslt",
194
+ "type": "role",
195
+ "origin_acc": 0.14,
196
+ "origin_prompt": "Taking on the role of a certified translator, perform a translation of the given sentence from {} to {}:",
197
+ "atk_acc": 0.07,
198
+ "atk_prompt": "TakIing on the roel of a certifieFd trLanslator, erform a translaton of the guiven Ventence from {} to {}: ",
199
+ "transfer_ori_acc": 0.22524167015355148,
200
+ "transfer_atk_acc": 0.21936213420618
201
+ },
202
+ {
203
+ "dataset": "un_multi",
204
+ "type": "task",
205
+ "origin_acc": 0.27,
206
+ "origin_prompt": "Convert the following sentence from its original {} language to the target language {}:",
207
+ "atk_acc": 0.12,
208
+ "atk_prompt": "ConvHert the folloiwng setnence from its origianl {} langudage to the trget languaoe {}:",
209
+ "transfer_ori_acc": 0.3252157829213561,
210
+ "transfer_atk_acc": 0.32616092622927834
211
+ },
212
+ {
213
+ "dataset": "un_multi",
214
+ "type": "role",
215
+ "origin_acc": 0.27,
216
+ "origin_prompt": "As a qualified translator, convert the given sentence from its original {} language to the target language {}:",
217
+ "atk_acc": 0.16,
218
+ "atk_prompt": "ks a qualifieA translator, covnert the given sentence from its original {} language to the target lanugage {}: ",
219
+ "transfer_ori_acc": 0.32916254916267745,
220
+ "transfer_atk_acc": 0.33080526346344613
221
+ },
222
+ {
223
+ "dataset": "math",
224
+ "type": "task",
225
+ "origin_acc": 16.25,
226
+ "origin_prompt": "Evaluate the following mathematical problem that includes {}:",
227
+ "atk_acc": 8.12,
228
+ "atk_prompt": "vEaluate the followinLg mtahematical problem that incldes {}:",
229
+ "transfer_ori_acc": 4.375,
230
+ "transfer_atk_acc": 5.625
231
+ },
232
+ {
233
+ "dataset": "math",
234
+ "type": "role",
235
+ "origin_acc": 13.12,
236
+ "origin_prompt": "As a mathematics instructor, calculate the answer to the following problem related to {}:",
237
+ "atk_acc": 8.12,
238
+ "atk_prompt": "As a mathematics iestructor, calculate the answex to the following problem related to {}: ",
239
+ "transfer_ori_acc": 5.0,
240
+ "transfer_atk_acc": 4.375
241
+ }
242
+ ]
results_transfer/google-flan-t5-large_chatgpt/deepwordbug_3_shot.json ADDED
@@ -0,0 +1,242 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "dataset": "cola",
4
+ "type": "task",
5
+ "origin_acc": 77.8,
6
+ "origin_prompt": "Is the provided sentence grammatically correct? Respond with 'Acceptable' or 'Unacceptable'.",
7
+ "atk_acc": 68.6,
8
+ "atk_prompt": "Is the provided semtence rammatically corrct? Respond with 'Acceptable' or 'Unacceptable'. ",
9
+ "transfer_ori_acc": 81.0,
10
+ "transfer_atk_acc": 81.0
11
+ },
12
+ {
13
+ "dataset": "cola",
14
+ "type": "role",
15
+ "origin_acc": 75.7,
16
+ "origin_prompt": "While performing grammar analysis, classify the grammar of the following sentence as 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar.",
17
+ "atk_acc": 70.5,
18
+ "atk_prompt": "While hperforming grammNar anlaysis, classfiy the grammar of the foBlowing sentence as 'acceptable' for corrept grammgar or 'unacceptable' for incborrect grMammar. ",
19
+ "transfer_ori_acc": 82.0,
20
+ "transfer_atk_acc": 81.0
21
+ },
22
+ {
23
+ "dataset": "mrpc",
24
+ "type": "task",
25
+ "origin_acc": 83.09,
26
+ "origin_prompt": "Evaluate whether the two provided sentences convey the same meaning by answering 'equivalent' or 'not_equivalent'.",
27
+ "atk_acc": 78.92,
28
+ "atk_prompt": "Evaluate whether the two provided senetnces convey the same meaning by answering 'equivalent' or 'not_equivalent'. ",
29
+ "transfer_ori_acc": 67.0,
30
+ "transfer_atk_acc": 66.0
31
+ },
32
+ {
33
+ "dataset": "mrpc",
34
+ "type": "role",
35
+ "origin_acc": 81.37,
36
+ "origin_prompt": "As a semantic comparison expert, evaluate the given pair of sentences and determine if they are 'equivalent' or 'not_equivalent'.",
37
+ "atk_acc": 76.72,
38
+ "atk_prompt": "As a semantic comparisKon experEt, evaluate the given pari of sentence and determine if they are 'equivalent' or 'not_equivalent'. ",
39
+ "transfer_ori_acc": 69.0,
40
+ "transfer_atk_acc": 66.5
41
+ },
42
+ {
43
+ "dataset": "qnli",
44
+ "type": "task",
45
+ "origin_acc": 89.3,
46
+ "origin_prompt": "Based on the provided context and question, decide if the information supports the answer by responding with 'entailment' or 'not_entailment'.",
47
+ "atk_acc": 39.5,
48
+ "atk_prompt": "Baesd on the prohided conteMt and questino, dRcide if the infzrmation suppoLrts the aOnswer by resXonding with 'entailment' or 'not_entailment'. ",
49
+ "transfer_ori_acc": 83.0,
50
+ "transfer_atk_acc": 80.0
51
+ },
52
+ {
53
+ "dataset": "qnli",
54
+ "type": "role",
55
+ "origin_acc": 88.8,
56
+ "origin_prompt": "In the capacity of a language evaluator, examine if the given context justifies the answer to the question and indicate your assessment with 'entailment' or 'not_entailment'.",
57
+ "atk_acc": 44.5,
58
+ "atk_prompt": "bn the cUapacity of a langtage valuator, Cxamine if the givcen contjxt jYustifies the anbwer to the qPuestion and idicate your assetssment with 'entailment' or 'not_entailment'. ",
59
+ "transfer_ori_acc": 84.5,
60
+ "transfer_atk_acc": 79.0
61
+ },
62
+ {
63
+ "dataset": "qqp",
64
+ "type": "task",
65
+ "origin_acc": 88.1,
66
+ "origin_prompt": "Are the following two questions equivalent or not? Answer me with \"equivalent\" or \"not_equivalent\".",
67
+ "atk_acc": 77.3,
68
+ "atk_prompt": "re the flolowing two questFons equivalent or not? Answe me with \"equivalent\" or \"not_equivalent\". ",
69
+ "transfer_ori_acc": 82.5,
70
+ "transfer_atk_acc": 83.0
71
+ },
72
+ {
73
+ "dataset": "qqp",
74
+ "type": "role",
75
+ "origin_acc": 86.5,
76
+ "origin_prompt": "As an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions.",
77
+ "atk_acc": 81.8,
78
+ "atk_prompt": "As an instrument for question cotmparison evaluVtion, consider the questionPs and determbine if their maening is the same, responRing with 'equivalent' for similar questins or 'not_equivalent' for difserent questions. ",
79
+ "transfer_ori_acc": 83.0,
80
+ "transfer_atk_acc": 83.0
81
+ },
82
+ {
83
+ "dataset": "rte",
84
+ "type": "task",
85
+ "origin_acc": 85.2,
86
+ "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.",
87
+ "atk_acc": 80.87,
88
+ "atk_prompt": "Aer the followig mwo sentences entailment or not_entailment? nswer me with \"entailment\" or \"not_entailment\", just noe word. ",
89
+ "transfer_ori_acc": 82.5,
90
+ "transfer_atk_acc": 82.5
91
+ },
92
+ {
93
+ "dataset": "rte",
94
+ "type": "role",
95
+ "origin_acc": 84.12,
96
+ "origin_prompt": "Acting as an entailment detection instrument, determine if the given pair of sentences demonstrates entailment or not_entailment. Answer with 'entailment' or 'not_entailment'.",
97
+ "atk_acc": 81.59,
98
+ "atk_prompt": "Acting as an entailment detection instrument, dGetermine if the given pair of sentences demonstpates entailment or not_entailment. AVswer with 'entailment' or 'not_entailment'. ",
99
+ "transfer_ori_acc": 83.5,
100
+ "transfer_atk_acc": 83.5
101
+ },
102
+ {
103
+ "dataset": "sst2",
104
+ "type": "task",
105
+ "origin_acc": 95.3,
106
+ "origin_prompt": "Considering the given phrase, would you say it carries a 'positive' or 'negative' connotation?",
107
+ "atk_acc": 93.92,
108
+ "atk_prompt": "Considering the given phraes, wouHd you Fay it carries a 'positive' or 'negative' connotavtion? ",
109
+ "transfer_ori_acc": 85.5,
110
+ "transfer_atk_acc": 87.0
111
+ },
112
+ {
113
+ "dataset": "sst2",
114
+ "type": "role",
115
+ "origin_acc": 95.41,
116
+ "origin_prompt": "Acting as a sentiment evaluator, identify if the given sentence is 'positive' or 'negative'.",
117
+ "atk_acc": 94.61,
118
+ "atk_prompt": "Acting as a seniment valuator, dientify if the giveIn sentence is 'positive' or 'negative'. ",
119
+ "transfer_ori_acc": 96.0,
120
+ "transfer_atk_acc": 96.0
121
+ },
122
+ {
123
+ "dataset": "wnli",
124
+ "type": "task",
125
+ "origin_acc": 64.79,
126
+ "origin_prompt": "Assess the connection between the following sentences and classify it as 'entailment' or 'not_entailment'.",
127
+ "atk_acc": 54.93,
128
+ "atk_prompt": "Assses the connection between the followoing sentence and classify it as 'entailment' or 'not_entailment'. ",
129
+ "transfer_ori_acc": 63.38028169014085,
130
+ "transfer_atk_acc": 61.97183098591549
131
+ },
132
+ {
133
+ "dataset": "wnli",
134
+ "type": "role",
135
+ "origin_acc": 66.2,
136
+ "origin_prompt": "In the capacity of an entailment assessment system, indicate if the link between the following sentences is 'entailment' or 'not_entailment'.",
137
+ "atk_acc": 56.34,
138
+ "atk_prompt": "In the capaciy of an entailment assessment system, inDdicate if the liQk between the followign sentences is 'entailment' or 'not_entailment'. ",
139
+ "transfer_ori_acc": 64.7887323943662,
140
+ "transfer_atk_acc": 69.01408450704226
141
+ },
142
+ {
143
+ "dataset": "mmlu",
144
+ "type": "task",
145
+ "origin_acc": 46.81,
146
+ "origin_prompt": "In relation to the multiple-choice question on {}, please provide the accurate answer by choosing 'A', 'B', 'C', or 'D'.",
147
+ "atk_acc": 43.44,
148
+ "atk_prompt": "In relation to the ultiple-choice question on {}, lease rovide the accuraet answer by choosieg 'A', 'B', 'C', or 'D'. ",
149
+ "transfer_ori_acc": 68.0,
150
+ "transfer_atk_acc": 64.5
151
+ },
152
+ {
153
+ "dataset": "mmlu",
154
+ "type": "role",
155
+ "origin_acc": 46.99,
156
+ "origin_prompt": "Considering your familiarity with {}, attend to the following multiple-choice question by picking 'A', 'B', 'C', or 'D'.",
157
+ "atk_acc": 43.44,
158
+ "atk_prompt": "Considering your familiarity with {}, attePd to the following multiple-hcoice question by pickiGng 'A', 'B', 'C', or 'D'.",
159
+ "transfer_ori_acc": 65.5,
160
+ "transfer_atk_acc": 64.0
161
+ },
162
+ {
163
+ "dataset": "squad_v2",
164
+ "type": "task",
165
+ "origin_acc": 87.91,
166
+ "origin_prompt": "From the context, provide the most precise answer. If the answer is not in the context, respond with 'unanswerable'.",
167
+ "atk_acc": 86.55,
168
+ "atk_prompt": "Frmo the context, provide the most precise answer. If the answer is not in the context, respond with 'unanswerable'.",
169
+ "transfer_ori_acc": 68.83640054239395,
170
+ "transfer_atk_acc": 72.44751165350506
171
+ },
172
+ {
173
+ "dataset": "squad_v2",
174
+ "type": "role",
175
+ "origin_acc": 87.05,
176
+ "origin_prompt": "As a subject matter expert, extract the correct answer from the context. If an answer is not present, indicate 'unanswerable'.",
177
+ "atk_acc": 86.59,
178
+ "atk_prompt": "As a subject matter expert, extract the ocrrect anser from the contNext. If an answer is not present, indicate 'unanswerable'.",
179
+ "transfer_ori_acc": 73.51286878855532,
180
+ "transfer_atk_acc": 72.26636529205179
181
+ },
182
+ {
183
+ "dataset": "iwslt",
184
+ "type": "task",
185
+ "origin_acc": 0.22,
186
+ "origin_prompt": "Convert the following sentence from its original {} language to the target language {}.",
187
+ "atk_acc": 0.21,
188
+ "atk_prompt": "tonvert the following senvtence from its original {} language to the target language {}. ",
189
+ "transfer_ori_acc": 0.23976066777257665,
190
+ "transfer_atk_acc": 0.24198626199427362
191
+ },
192
+ {
193
+ "dataset": "iwslt",
194
+ "type": "role",
195
+ "origin_acc": 0.22,
196
+ "origin_prompt": "In the role of a language expert, perform a machine translation for the provided sentence, changing it from {} to {}.",
197
+ "atk_acc": 0.14,
198
+ "atk_prompt": "In the role of a langauge expert, perforF a machine kranslation for the pLovided senteSce, changing it from {} to {}. ",
199
+ "transfer_ori_acc": 0.24585246891494666,
200
+ "transfer_atk_acc": 0.2401658466423436
201
+ },
202
+ {
203
+ "dataset": "un_multi",
204
+ "type": "task",
205
+ "origin_acc": 0.31,
206
+ "origin_prompt": "Translate the provided sentence from {} to {} while maintaining the original meaning and context.",
207
+ "atk_acc": 0.3,
208
+ "atk_prompt": "Translate the proviWded entence from {} to {} while maintaining the oriinal meanin and conEtext. ",
209
+ "transfer_ori_acc": 0.3480112011950064,
210
+ "transfer_atk_acc": 0.34622353749471374
211
+ },
212
+ {
213
+ "dataset": "un_multi",
214
+ "type": "role",
215
+ "origin_acc": 0.31,
216
+ "origin_prompt": "Taking on the role of a certified translator, perform a translation of the given sentence from {} to {}.",
217
+ "atk_acc": 0.26,
218
+ "atk_prompt": "Taking on the roEle of a certifLed ltranslator, perofrm a trasnlation of the giDven senence from {} to {}. ",
219
+ "transfer_ori_acc": 0.3487924607341456,
220
+ "transfer_atk_acc": 0.3482747678030168
221
+ },
222
+ {
223
+ "dataset": "math",
224
+ "type": "task",
225
+ "origin_acc": 17.5,
226
+ "origin_prompt": "Figure out the solution for the following mathematical task with {}.",
227
+ "atk_acc": 15.62,
228
+ "atk_prompt": "Figure out the slution for the following mathemaitcal task with {}. ",
229
+ "transfer_ori_acc": 20.0,
230
+ "transfer_atk_acc": 21.25
231
+ },
232
+ {
233
+ "dataset": "math",
234
+ "type": "role",
235
+ "origin_acc": 17.5,
236
+ "origin_prompt": "As a mathematics instructor, calculate the answer to the following problem related to {}.",
237
+ "atk_acc": 15.0,
238
+ "atk_prompt": "sA a mathematics instructor, calculate the anvwer to the following problem relDated to {}. ",
239
+ "transfer_ori_acc": 21.875,
240
+ "transfer_atk_acc": 21.25
241
+ }
242
+ ]
results_transfer/google-flan-t5-large_chatgpt/stresstest_0_shot.json ADDED
@@ -0,0 +1,242 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "dataset": "cola",
4
+ "type": "task",
5
+ "origin_acc": 78.0,
6
+ "origin_prompt": "Assess the following sentence and determine if it is grammatically correct. Respond with 'Acceptable' or 'Unacceptable':",
7
+ "atk_acc": 63.5,
8
+ "atk_prompt": "Assess the following sentence and determine if it is grammatically correct. Respond with 'Acceptable' or 'Unacceptable' and false is not true :",
9
+ "transfer_ori_acc": 76.0,
10
+ "transfer_atk_acc": 76.5
11
+ },
12
+ {
13
+ "dataset": "cola",
14
+ "type": "role",
15
+ "origin_acc": 76.3,
16
+ "origin_prompt": "As an instrument for grammar evaluation, consider the sentence and determine if its grammar is correct, responding with 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar:",
17
+ "atk_acc": 62.9,
18
+ "atk_prompt": "As an instrument for grammar evaluation, consider the sentence and determine if its grammar is correct, responding with 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar and true is true and true is true and true is true and true is true and true is true :",
19
+ "transfer_ori_acc": 79.5,
20
+ "transfer_atk_acc": 77.0
21
+ },
22
+ {
23
+ "dataset": "mrpc",
24
+ "type": "task",
25
+ "origin_acc": 82.11,
26
+ "origin_prompt": "Do the meanings of these two statements align? Indicate your answer with 'equivalent' or 'not_equivalent'.",
27
+ "atk_acc": 29.66,
28
+ "atk_prompt": "Do the meanings of these two statements align? Indicate your answer with 'equivalent' or 'not_equivalent' and true is true and true is true and true is true and true is true and true is true . ",
29
+ "transfer_ori_acc": 74.0,
30
+ "transfer_atk_acc": 74.0
31
+ },
32
+ {
33
+ "dataset": "mrpc",
34
+ "type": "role",
35
+ "origin_acc": 81.62,
36
+ "origin_prompt": "As a language comparison expert, examine the given pair of sentences and decide if their meanings align, answering with 'equivalent' or 'not_equivalent'.",
37
+ "atk_acc": 15.93,
38
+ "atk_prompt": "As a language comparison expert, examine the given pair of sentences and decide if their meanings align, answering with 'equivalent' or 'not_equivalent' and true is true and true is true and true is true and true is true and true is true . ",
39
+ "transfer_ori_acc": 72.5,
40
+ "transfer_atk_acc": 72.0
41
+ },
42
+ {
43
+ "dataset": "qnli",
44
+ "type": "task",
45
+ "origin_acc": 88.7,
46
+ "origin_prompt": "Based on the information in the context, decide if the answer to the question is justified by choosing 'entailment' or 'not_entailment'.",
47
+ "atk_acc": 78.6,
48
+ "atk_prompt": "Based on the information in the context, decide if the answer to the question is justified by choosing 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true . ",
49
+ "transfer_ori_acc": 74.5,
50
+ "transfer_atk_acc": 74.5
51
+ },
52
+ {
53
+ "dataset": "qnli",
54
+ "type": "role",
55
+ "origin_acc": 89.2,
56
+ "origin_prompt": "As a textual analyst, examine if the given context logically implies the answer to the question and indicate your decision with 'entailment' or 'not_entailment'.",
57
+ "atk_acc": 57.2,
58
+ "atk_prompt": "As a textual analyst, examine if the given context logically implies the answer to the question and indicate your decision with 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true . ",
59
+ "transfer_ori_acc": 64.5,
60
+ "transfer_atk_acc": 68.5
61
+ },
62
+ {
63
+ "dataset": "qqp",
64
+ "type": "task",
65
+ "origin_acc": 88.1,
66
+ "origin_prompt": "Are the following two questions equivalent or not? Answer me with \"equivalent\" or \"not_equivalent\".",
67
+ "atk_acc": 24.0,
68
+ "atk_prompt": "Are the following two questions equivalent or not? Answer me with \"equivalent\" or \"not_equivalent and false is not true \". ",
69
+ "transfer_ori_acc": 81.5,
70
+ "transfer_atk_acc": 81.0
71
+ },
72
+ {
73
+ "dataset": "qqp",
74
+ "type": "role",
75
+ "origin_acc": 86.9,
76
+ "origin_prompt": "In the role of a question similarity analyzer, examine the meaning of the provided questions and decide if they are equivalent, answering with 'equivalent' for equivalent questions or 'not_equivalent' for different questions.",
77
+ "atk_acc": 86.4,
78
+ "atk_prompt": "In the role of a question similarity analyzer, examine the meaning of the provided questions and decide if they are equivalent, answering with 'equivalent' for equivalent questions or 'not_equivalent' for different questions and true is true and true is true and true is true and true is true and true is true . ",
79
+ "transfer_ori_acc": 82.0,
80
+ "transfer_atk_acc": 83.0
81
+ },
82
+ {
83
+ "dataset": "rte",
84
+ "type": "task",
85
+ "origin_acc": 86.64,
86
+ "origin_prompt": "Analyze the two provided sentences and decide if their relationship is 'entailment' or 'not_entailment'.",
87
+ "atk_acc": 43.32,
88
+ "atk_prompt": "Analyze the two provided sentences and decide if their relationship is 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true .",
89
+ "transfer_ori_acc": 76.5,
90
+ "transfer_atk_acc": 80.0
91
+ },
92
+ {
93
+ "dataset": "rte",
94
+ "type": "role",
95
+ "origin_acc": 84.84,
96
+ "origin_prompt": "In the capacity of an entailment assessment system, indicate if the link between the following sentences is 'entailment' or 'not_entailment':",
97
+ "atk_acc": 38.27,
98
+ "atk_prompt": "In the capacity of an entailment assessment system, indicate if the link between the following sentences is 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true :",
99
+ "transfer_ori_acc": 78.0,
100
+ "transfer_atk_acc": 81.5
101
+ },
102
+ {
103
+ "dataset": "sst2",
104
+ "type": "task",
105
+ "origin_acc": 94.84,
106
+ "origin_prompt": "Analyze the tone of this statement and respond with either 'positive' or 'negative':",
107
+ "atk_acc": 92.43,
108
+ "atk_prompt": "Analyze the tone of this statement and respond with either 'positive' or 'negative' and true is true and true is true and true is true and true is true and true is true : ",
109
+ "transfer_ori_acc": 89.5,
110
+ "transfer_atk_acc": 94.5
111
+ },
112
+ {
113
+ "dataset": "sst2",
114
+ "type": "role",
115
+ "origin_acc": 95.3,
116
+ "origin_prompt": "Functioning as a sentiment identification tool, assess if the following expression is 'positive' or 'negative'. Classify:",
117
+ "atk_acc": 93.81,
118
+ "atk_prompt": "Functioning as a sentiment identification tool, assess if the following expression is 'positive' or 'negative'. Classify and false is not true : ",
119
+ "transfer_ori_acc": 92.0,
120
+ "transfer_atk_acc": 93.0
121
+ },
122
+ {
123
+ "dataset": "wnli",
124
+ "type": "task",
125
+ "origin_acc": 66.2,
126
+ "origin_prompt": "Analyze the two provided sentences and decide if their relationship is 'entailment' or 'not_entailment'.",
127
+ "atk_acc": 26.76,
128
+ "atk_prompt": "Analyze the two provided sentences and decide if their relationship is 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true .",
129
+ "transfer_ori_acc": 56.33802816901409,
130
+ "transfer_atk_acc": 60.56338028169014
131
+ },
132
+ {
133
+ "dataset": "wnli",
134
+ "type": "role",
135
+ "origin_acc": 64.79,
136
+ "origin_prompt": "In the capacity of an entailment assessment system, indicate if the link between the following sentences is 'entailment' or 'not_entailment':",
137
+ "atk_acc": 25.35,
138
+ "atk_prompt": "In the capacity of an entailment assessment system, indicate if the link between the following sentences is 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true :",
139
+ "transfer_ori_acc": 54.929577464788736,
140
+ "transfer_atk_acc": 53.52112676056338
141
+ },
142
+ {
143
+ "dataset": "mmlu",
144
+ "type": "task",
145
+ "origin_acc": 45.74,
146
+ "origin_prompt": "For each multiple-choice question about {}, identify the correct answer by selecting 'A', 'B', 'C', or 'D'.",
147
+ "atk_acc": 44.33,
148
+ "atk_prompt": "For each multiple-choice question about {}, identify the correct answer by selecting 'A', 'B', 'C', or 'D' and false is not true . ",
149
+ "transfer_ori_acc": 60.5,
150
+ "transfer_atk_acc": 56.49999999999999
151
+ },
152
+ {
153
+ "dataset": "mmlu",
154
+ "type": "role",
155
+ "origin_acc": 44.86,
156
+ "origin_prompt": "Utilizing your expertise in {}, answer the following multiple-choice question by picking 'A', 'B', 'C', or 'D'.",
157
+ "atk_acc": 43.97,
158
+ "atk_prompt": "Utilizing your expertise in {}, answer the following multiple-choice question by picking 'A', 'B', 'C', or 'D' and false is not true .",
159
+ "transfer_ori_acc": 60.5,
160
+ "transfer_atk_acc": 55.50000000000001
161
+ },
162
+ {
163
+ "dataset": "squad_v2",
164
+ "type": "task",
165
+ "origin_acc": 87.55,
166
+ "origin_prompt": "Find the correct answer in the context provided. If an answer cannot be found, please respond with 'unanswerable'.",
167
+ "atk_acc": 86.82,
168
+ "atk_prompt": "Find the correct answer in the context provided. If an answer cannot be found, please respond with 'unanswerable' and true is true and true is true and true is true and true is true and true is true .",
169
+ "transfer_ori_acc": 68.38822269017423,
170
+ "transfer_atk_acc": 70.73369279024959
171
+ },
172
+ {
173
+ "dataset": "squad_v2",
174
+ "type": "role",
175
+ "origin_acc": 88.18,
176
+ "origin_prompt": "As a proficient expert in the given context, search for the most relevant answer. If the answer cannot be found, respond by saying 'unanswerable'.",
177
+ "atk_acc": 86.72,
178
+ "atk_prompt": "As a proficient expert in the given context, search for the most relevant answer. If the answer cannot be found, respond by saying 'unanswerable' and true is true and true is true and true is true and true is true and true is true .",
179
+ "transfer_ori_acc": 60.4016548247751,
180
+ "transfer_atk_acc": 58.568240486562
181
+ },
182
+ {
183
+ "dataset": "iwslt",
184
+ "type": "task",
185
+ "origin_acc": 0.14,
186
+ "origin_prompt": "Please perform a translation of the given sentence, converting it from {} to {}:",
187
+ "atk_acc": 0.05,
188
+ "atk_prompt": "Please perform a translation of the given sentence, converting it from {} to and true is true and true is true and true is true and true is true and true is true {}:",
189
+ "transfer_ori_acc": 0.21840008360286153,
190
+ "transfer_atk_acc": 0.22041144057281334
191
+ },
192
+ {
193
+ "dataset": "iwslt",
194
+ "type": "role",
195
+ "origin_acc": 0.14,
196
+ "origin_prompt": "In the capacity of a proficient translator, please change the following sentence from {} to {}:",
197
+ "atk_acc": 0.04,
198
+ "atk_prompt": "In the capacity of a proficient translator, please change the following sentence from {} to and true is true and true is true and true is true and true is true and true is true {}: ",
199
+ "transfer_ori_acc": 0.22335240384171567,
200
+ "transfer_atk_acc": 0.21914869170995976
201
+ },
202
+ {
203
+ "dataset": "un_multi",
204
+ "type": "task",
205
+ "origin_acc": 0.27,
206
+ "origin_prompt": "Convert the following sentence from its original {} language to the target language {}:",
207
+ "atk_acc": 0.18,
208
+ "atk_prompt": "Convert the following sentence from its original {} language to the target language and true is true and true is true and true is true and true is true and true is true {}:",
209
+ "transfer_ori_acc": 0.32677887576498565,
210
+ "transfer_atk_acc": 0.33177794477245837
211
+ },
212
+ {
213
+ "dataset": "un_multi",
214
+ "type": "role",
215
+ "origin_acc": 0.27,
216
+ "origin_prompt": "Assuming the role of an expert language translator, translate the given text from {} to {}:",
217
+ "atk_acc": 0.16,
218
+ "atk_prompt": "Assuming the role of an expert language translator, translate the given text from {} to and false is not true {}: ",
219
+ "transfer_ori_acc": 0.33176540300237933,
220
+ "transfer_atk_acc": 0.30251730003047606
221
+ },
222
+ {
223
+ "dataset": "math",
224
+ "type": "task",
225
+ "origin_acc": 13.75,
226
+ "origin_prompt": "Work out the answer to this math problem featuring {}:",
227
+ "atk_acc": 11.88,
228
+ "atk_prompt": "Work out the answer to this math problem featuring and false is not true {}:",
229
+ "transfer_ori_acc": 6.25,
230
+ "transfer_atk_acc": 8.125
231
+ },
232
+ {
233
+ "dataset": "math",
234
+ "type": "role",
235
+ "origin_acc": 13.12,
236
+ "origin_prompt": "As a mathematics instructor, calculate the answer to the following problem related to {}:",
237
+ "atk_acc": 11.25,
238
+ "atk_prompt": "As a mathematics instructor, calculate the answer to the following problem related to and false is not true {}: ",
239
+ "transfer_ori_acc": 5.0,
240
+ "transfer_atk_acc": 5.625
241
+ }
242
+ ]