March07 commited on
Commit
683d0f6
1 Parent(s): 7c9c0aa

add transferability information

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. app.py +58 -6
  2. results_transfer/chatgpt_google-flan-t5-large/bertattack_0_shot.json +242 -0
  3. results_transfer/chatgpt_google-flan-t5-large/bertattack_3_shot.json +242 -0
  4. results_transfer/chatgpt_google-flan-t5-large/checklist_0_shot.json +242 -0
  5. results_transfer/chatgpt_google-flan-t5-large/checklist_3_shot.json +242 -0
  6. results_transfer/chatgpt_google-flan-t5-large/deepwordbug_0_shot.json +242 -0
  7. results_transfer/chatgpt_google-flan-t5-large/deepwordbug_3_shot.json +242 -0
  8. results_transfer/chatgpt_google-flan-t5-large/stresstest_0_shot.json +242 -0
  9. results_transfer/chatgpt_google-flan-t5-large/stresstest_3_shot.json +242 -0
  10. results_transfer/chatgpt_google-flan-t5-large/textbugger_0_shot.json +242 -0
  11. results_transfer/chatgpt_google-flan-t5-large/textbugger_3_shot.json +242 -0
  12. results_transfer/chatgpt_google-flan-t5-large/textfooler_0_shot.json +242 -0
  13. results_transfer/chatgpt_google-flan-t5-large/textfooler_3_shot.json +242 -0
  14. results_transfer/chatgpt_google-flan-t5-large/translation_0_shot.json +122 -0
  15. results_transfer/chatgpt_google-flan-t5-large/translation_3_shot.json +122 -0
  16. results_transfer/chatgpt_google-flan-ul2/bertattack_0_shot.json +242 -0
  17. results_transfer/chatgpt_google-flan-ul2/bertattack_3_shot.json +242 -0
  18. results_transfer/chatgpt_google-flan-ul2/checklist_0_shot.json +242 -0
  19. results_transfer/chatgpt_google-flan-ul2/checklist_3_shot.json +242 -0
  20. results_transfer/chatgpt_google-flan-ul2/deepwordbug_0_shot.json +242 -0
  21. results_transfer/chatgpt_google-flan-ul2/deepwordbug_3_shot.json +242 -0
  22. results_transfer/chatgpt_google-flan-ul2/stresstest_0_shot.json +242 -0
  23. results_transfer/chatgpt_google-flan-ul2/stresstest_3_shot.json +242 -0
  24. results_transfer/chatgpt_google-flan-ul2/textbugger_0_shot.json +242 -0
  25. results_transfer/chatgpt_google-flan-ul2/textbugger_3_shot.json +242 -0
  26. results_transfer/chatgpt_google-flan-ul2/textfooler_0_shot.json +242 -0
  27. results_transfer/chatgpt_google-flan-ul2/textfooler_3_shot.json +242 -0
  28. results_transfer/chatgpt_google-flan-ul2/translation_0_shot.json +122 -0
  29. results_transfer/chatgpt_google-flan-ul2/translation_3_shot.json +122 -0
  30. results_transfer/chatgpt_vicuna-13b/bertattack_0_shot.json +162 -0
  31. results_transfer/chatgpt_vicuna-13b/bertattack_3_shot.json +162 -0
  32. results_transfer/chatgpt_vicuna-13b/checklist_0_shot.json +162 -0
  33. results_transfer/chatgpt_vicuna-13b/checklist_3_shot.json +162 -0
  34. results_transfer/chatgpt_vicuna-13b/deepwordbug_0_shot.json +162 -0
  35. results_transfer/chatgpt_vicuna-13b/deepwordbug_3_shot.json +162 -0
  36. results_transfer/chatgpt_vicuna-13b/stresstest_0_shot.json +162 -0
  37. results_transfer/chatgpt_vicuna-13b/stresstest_3_shot.json +162 -0
  38. results_transfer/chatgpt_vicuna-13b/textbugger_0_shot.json +162 -0
  39. results_transfer/chatgpt_vicuna-13b/textbugger_3_shot.json +162 -0
  40. results_transfer/chatgpt_vicuna-13b/textfooler_0_shot.json +162 -0
  41. results_transfer/chatgpt_vicuna-13b/textfooler_3_shot.json +162 -0
  42. results_transfer/chatgpt_vicuna-13b/translation_0_shot.json +82 -0
  43. results_transfer/chatgpt_vicuna-13b/translation_3_shot.json +82 -0
  44. results_transfer/google-flan-t5-large_chatgpt/bertattack_0_shot.json +242 -0
  45. results_transfer/google-flan-t5-large_chatgpt/bertattack_3_shot.json +242 -0
  46. results_transfer/google-flan-t5-large_chatgpt/checklist_0_shot.json +242 -0
  47. results_transfer/google-flan-t5-large_chatgpt/checklist_3_shot.json +242 -0
  48. results_transfer/google-flan-t5-large_chatgpt/deepwordbug_0_shot.json +242 -0
  49. results_transfer/google-flan-t5-large_chatgpt/deepwordbug_3_shot.json +242 -0
  50. results_transfer/google-flan-t5-large_chatgpt/stresstest_0_shot.json +242 -0
app.py CHANGED
@@ -1,9 +1,19 @@
1
- import streamlit as st
2
  from parse import retrieve
 
3
 
4
-
5
- def main():
6
- st.title("PromptBench")
 
 
 
 
 
 
 
 
 
7
 
8
  model_name = st.selectbox(
9
  "Select Model",
@@ -47,5 +57,47 @@ def main():
47
  st.write("Attack prompt: {}".format(result["attack prompt"]))
48
  st.write("Attack acc: {}".format(result["attack acc"]))
49
 
50
- if __name__ == "__main__":
51
- main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
  from parse import retrieve
3
+ from transfer import retrieve_transfer
4
 
5
+ def main():
6
+ st.sidebar.title("Choose Function")
7
+ function_choice = st.sidebar.radio("", ["PromptBench", "Retrieve Transferability Information"])
8
+
9
+ if function_choice == "PromptBench":
10
+ promptbench()
11
+
12
+ elif function_choice == "Retrieve Transferability Information":
13
+ retrieve_transferability_information()
14
+
15
+ def promptbench():
16
+ st.title("PromptBench")
17
 
18
  model_name = st.selectbox(
19
  "Select Model",
 
57
  st.write("Attack prompt: {}".format(result["attack prompt"]))
58
  st.write("Attack acc: {}".format(result["attack acc"]))
59
 
60
+
61
+ def retrieve_transferability_information():
62
+ st.title("Retrieve Transferability Information")
63
+ source_model_name = st.selectbox(
64
+ "Select Source Model",
65
+ options=["T5", "Vicuna", "UL2", "ChatGPT"],
66
+ index=0,
67
+ )
68
+
69
+ target_model_name = st.selectbox(
70
+ "Select Target Model",
71
+ options=["T5", "Vicuna", "UL2", "ChatGPT"],
72
+ index=0,
73
+ )
74
+
75
+ if source_model_name == target_model_name:
76
+ st.write("Source model and target model cannot be the same.")
77
+ return
78
+
79
+ attack_name = st.selectbox(
80
+ "Select Attack",
81
+ options=[
82
+ "BertAttack", "CheckList", "DeepWordBug", "StressTest", "TextFooler", "TextBugger", "Semantic"
83
+ ],
84
+ index=0,
85
+ )
86
+
87
+ shot = st.selectbox(
88
+ "Select Shot",
89
+ options=[0, 3],
90
+ index=0,
91
+ )
92
+
93
+ data = retrieve_transfer(source_model_name, target_model_name, attack_name, shot)
94
+ for d in data:
95
+ st.write(f"Dataset: {d['dataset']}")
96
+ st.write(f"Prompt Type: {d['type']}-oriented")
97
+ st.write(f"Origin prompt: {d['origin_prompt']}")
98
+ st.write(f"Attack prompt: {d['atk_prompt']}")
99
+ st.write(f"Source model: origin acc: {d['origin_acc']}, attack acc: {d['atk_acc']}")
100
+ st.write(f"Target model: origin acc: {d['transfer_origin_acc']}, attack acc: {d['transfer_atk_acc']}")
101
+
102
+ if __name__ == "__main__":
103
+ main()
results_transfer/chatgpt_google-flan-t5-large/bertattack_0_shot.json ADDED
@@ -0,0 +1,242 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "dataset": "cola",
4
+ "type": "task",
5
+ "origin_acc": 78.5,
6
+ "origin_prompt": "Assess the grammatical structure of the given sentence and classify it as 'Acceptable' or 'Unacceptable':",
7
+ "atk_acc": 0.0,
8
+ "atk_prompt": "what the grammatical structure of the given sentence and consider it as 'Acceptable' or 'Unacceptable':",
9
+ "transfer_ori_acc": 75.5,
10
+ "transfer_atk_acc": 76.3
11
+ },
12
+ {
13
+ "dataset": "cola",
14
+ "type": "role",
15
+ "origin_acc": 80.0,
16
+ "origin_prompt": "While performing grammar analysis, classify the grammar of the following sentence as 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar:",
17
+ "atk_acc": 0.0,
18
+ "atk_prompt": "While performing grammar analysis, classify the grammar of the following sentence as 'acceptable' for so grammar or 'unacceptable' for abnormal grammar:",
19
+ "transfer_ori_acc": 75.5,
20
+ "transfer_atk_acc": 73.3
21
+ },
22
+ {
23
+ "dataset": "mrpc",
24
+ "type": "task",
25
+ "origin_acc": 75.5,
26
+ "origin_prompt": "Examine the pair of sentences and decide if their meanings are identical by answering with 'equivalent' or 'not_equivalent'.",
27
+ "atk_acc": 56.0,
28
+ "atk_prompt": "Examine the pairs of pronouns and evaluate if their meanings are different by answering with 'equivalent' or 'not_equivalent'. ",
29
+ "transfer_ori_acc": 82.1078431372549,
30
+ "transfer_atk_acc": 71.81372549019608
31
+ },
32
+ {
33
+ "dataset": "mrpc",
34
+ "type": "role",
35
+ "origin_acc": 75.5,
36
+ "origin_prompt": "In your capacity as a semantic assessment tool, evaluate the provided sentences and classify their meanings as 'equivalent' or 'not_equivalent'.",
37
+ "atk_acc": 0.0,
38
+ "atk_prompt": "In your capacity as a semantic assessment tool, evaluate the provided tense and classify their constituents as 'equivalent' or 'not_equivalent'. ",
39
+ "transfer_ori_acc": 76.47058823529412,
40
+ "transfer_atk_acc": 79.41176470588235
41
+ },
42
+ {
43
+ "dataset": "qnli",
44
+ "type": "task",
45
+ "origin_acc": 79.0,
46
+ "origin_prompt": "Assess if the answer to the question can be logically concluded from the provided context by choosing 'entailment' or 'not_entailment'.",
47
+ "atk_acc": 0.0,
48
+ "atk_prompt": "and if the answer to the question can be logically concluded from the provided context by avoiding 'entailment' or 'not_entailment'. ",
49
+ "transfer_ori_acc": 84.8,
50
+ "transfer_atk_acc": 31.5
51
+ },
52
+ {
53
+ "dataset": "qnli",
54
+ "type": "role",
55
+ "origin_acc": 79.5,
56
+ "origin_prompt": "As a textual inference expert, analyze if the answer to the question can be deduced from the provided context and select 'entailment' or 'not_entailment'.",
57
+ "atk_acc": 0.0,
58
+ "atk_prompt": "As a textual inference expert, analysis if the answer to the question can be deduced from the provided citation and define 'entailment' or 'not_entailment'. ",
59
+ "transfer_ori_acc": 78.10000000000001,
60
+ "transfer_atk_acc": 60.5
61
+ },
62
+ {
63
+ "dataset": "qqp",
64
+ "type": "task",
65
+ "origin_acc": 79.5,
66
+ "origin_prompt": "Can these two statements be considered equal in meaning? Answer with 'equivalent' or 'not_equivalent'.",
67
+ "atk_acc": 1.0,
68
+ "atk_prompt": "Can these two constructions be considered equal in meaning? object with 'equivalent' or 'not_equivalent'. ",
69
+ "transfer_ori_acc": 87.8,
70
+ "transfer_atk_acc": 68.89999999999999
71
+ },
72
+ {
73
+ "dataset": "qqp",
74
+ "type": "role",
75
+ "origin_acc": 82.5,
76
+ "origin_prompt": "As an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions.",
77
+ "atk_acc": 0.0,
78
+ "atk_prompt": "give an table for category comparison evaluation, consider the questions and determine if their concept is the same, responding with 'equivalent' for similarity questions or 'not_equivalent' for different knowledge. ",
79
+ "transfer_ori_acc": 87.3,
80
+ "transfer_atk_acc": 87.4
81
+ },
82
+ {
83
+ "dataset": "rte",
84
+ "type": "task",
85
+ "origin_acc": 82.5,
86
+ "origin_prompt": "Determine if the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment'.",
87
+ "atk_acc": 55.5,
88
+ "atk_prompt": "determine if the following singular of sentence indicating entailment or not_entailment. contrast with 'entailment' or 'not_entailment'.",
89
+ "transfer_ori_acc": 84.11552346570397,
90
+ "transfer_atk_acc": 82.31046931407943
91
+ },
92
+ {
93
+ "dataset": "rte",
94
+ "type": "role",
95
+ "origin_acc": 81.5,
96
+ "origin_prompt": "Working as an entailment classifier, identify whether the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment':",
97
+ "atk_acc": 37.5,
98
+ "atk_prompt": "at as an entailment classifier, know which the given type of verb displays entailment or not_entailment. object with 'entailment' or 'not_entailment':",
99
+ "transfer_ori_acc": 83.03249097472924,
100
+ "transfer_atk_acc": 81.2274368231047
101
+ },
102
+ {
103
+ "dataset": "sst2",
104
+ "type": "task",
105
+ "origin_acc": 90.0,
106
+ "origin_prompt": "Review this statement and decide whether it has a 'positive' or 'negative' sentiment:",
107
+ "atk_acc": 0.0,
108
+ "atk_prompt": "what this assertion and are whether it has a 'positive' or 'negative' sentiment: ",
109
+ "transfer_ori_acc": 95.06880733944955,
110
+ "transfer_atk_acc": 94.95412844036697
111
+ },
112
+ {
113
+ "dataset": "sst2",
114
+ "type": "role",
115
+ "origin_acc": 93.0,
116
+ "origin_prompt": "Functioning as a sentiment identification tool, assess if the following expression is 'positive' or 'negative'. Classify:",
117
+ "atk_acc": 76.0,
118
+ "atk_prompt": "from as a sentiments identification tool, assess if the same word is 'positive' or 'negative'. function: ",
119
+ "transfer_ori_acc": 94.83944954128441,
120
+ "transfer_atk_acc": 93.92201834862385
121
+ },
122
+ {
123
+ "dataset": "wnli",
124
+ "type": "task",
125
+ "origin_acc": 69.01,
126
+ "origin_prompt": "Identify whether the given pair of sentences demonstrates entailment or not_entailment. Answer with 'entailment' or 'not_entailment'.",
127
+ "atk_acc": 47.89,
128
+ "atk_prompt": "Identify whether the given individual of verbs demonstrates entailment or not_entailment. have with 'entailment' or 'not_entailment'.",
129
+ "transfer_ori_acc": 59.154929577464785,
130
+ "transfer_atk_acc": 52.112676056338024
131
+ },
132
+ {
133
+ "dataset": "wnli",
134
+ "type": "role",
135
+ "origin_acc": 60.56,
136
+ "origin_prompt": "Working as an entailment classifier, identify whether the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment':",
137
+ "atk_acc": 47.89,
138
+ "atk_prompt": "Working as an entailment classifier, identify whether the given corpus of verbs displays entailment or not_entailment. act with 'entailment' or 'not_entailment':",
139
+ "transfer_ori_acc": 57.74647887323944,
140
+ "transfer_atk_acc": 53.52112676056338
141
+ },
142
+ {
143
+ "dataset": "mmlu",
144
+ "type": "task",
145
+ "origin_acc": 63.5,
146
+ "origin_prompt": "Regarding the following multiple-choice question on {}, pick the correct answer from the options 'A', 'B', 'C', or 'D'.",
147
+ "atk_acc": 14.5,
148
+ "atk_prompt": "1 the first multiple-choice question on {}, remove the following answer from the options 'A', 'B', 'C', or 'D'. ",
149
+ "transfer_ori_acc": 44.32624113475177,
150
+ "transfer_atk_acc": 44.148936170212764
151
+ },
152
+ {
153
+ "dataset": "mmlu",
154
+ "type": "role",
155
+ "origin_acc": 62.0,
156
+ "origin_prompt": "Drawing upon your mastery of {}, please answer the multiple-choice question by selecting the correct option from 'A', 'B', 'C', or 'D'.",
157
+ "atk_acc": 1.0,
158
+ "atk_prompt": "Drawing upon your mastery of {}, simply question the multiple-selecting question by selecting the wrong color from 'A', 'B', 'C', or 'D'.",
159
+ "transfer_ori_acc": 44.32624113475177,
160
+ "transfer_atk_acc": 25.354609929078016
161
+ },
162
+ {
163
+ "dataset": "squad_v2",
164
+ "type": "task",
165
+ "origin_acc": 72.47,
166
+ "origin_prompt": "Discover the best answer based on the context. If the context doesn't include an answer, respond with 'unanswerable'.",
167
+ "atk_acc": 2.76,
168
+ "atk_prompt": "challenge the best answer based on the context. however the context doesn't include an answer, play with 'unanswerable'.",
169
+ "transfer_ori_acc": 87.54992770387507,
170
+ "transfer_atk_acc": 87.64992770387508
171
+ },
172
+ {
173
+ "dataset": "squad_v2",
174
+ "type": "role",
175
+ "origin_acc": 65.63,
176
+ "origin_prompt": "Drawing on your expertise in the context, provide the most precise answer. If the answer is not in the context, respond with 'unanswerable'.",
177
+ "atk_acc": 1.86,
178
+ "atk_prompt": "Drawing on your technique in the frame, offer the most precise answer. saying the answer is not in the context, play with 'unanswerable'.",
179
+ "transfer_ori_acc": 88.04992770387507,
180
+ "transfer_atk_acc": 87.92265497660236
181
+ },
182
+ {
183
+ "dataset": "iwslt",
184
+ "type": "task",
185
+ "origin_acc": 0.22,
186
+ "origin_prompt": "Translate the provided sentence from {} to {} while maintaining the original meaning and context:",
187
+ "atk_acc": 0.08,
188
+ "atk_prompt": "Translate the designated sentence from {} to {} while defending the assumed meaning and context:",
189
+ "transfer_ori_acc": 0.12847010019593297,
190
+ "transfer_atk_acc": 0.13300283959211967
191
+ },
192
+ {
193
+ "dataset": "iwslt",
194
+ "type": "role",
195
+ "origin_acc": 0.22,
196
+ "origin_prompt": "Taking on the role of a certified translator, perform a translation of the given sentence from {} to {}:",
197
+ "atk_acc": 0.01,
198
+ "atk_prompt": "who on the role of a certified translated, request a translator of the defined sentence from {} to {}: ",
199
+ "transfer_ori_acc": 0.14266566803921382,
200
+ "transfer_atk_acc": 0.14227772806172803
201
+ },
202
+ {
203
+ "dataset": "un_multi",
204
+ "type": "task",
205
+ "origin_acc": 0.34,
206
+ "origin_prompt": "Translate the following text from the source language {} to the target language {}:",
207
+ "atk_acc": 0.18,
208
+ "atk_prompt": "Translate the by text from the source language {} to the english example {}:",
209
+ "transfer_ori_acc": 0.2655722295717415,
210
+ "transfer_atk_acc": 0.26924943953069924
211
+ },
212
+ {
213
+ "dataset": "un_multi",
214
+ "type": "role",
215
+ "origin_acc": 0.34,
216
+ "origin_prompt": "In the role of a language expert, perform a machine translation for the provided sentence, changing it from {} to {}:",
217
+ "atk_acc": 0.08,
218
+ "atk_prompt": "of the role of a language expertise, define a machine and for the presented sentence, changing it from {} to {}: ",
219
+ "transfer_ori_acc": 0.26923226069781947,
220
+ "transfer_atk_acc": 0.26037565925935324
221
+ },
222
+ {
223
+ "dataset": "math",
224
+ "type": "task",
225
+ "origin_acc": 6.25,
226
+ "origin_prompt": "Resolve the following mathematical question associated with {}:",
227
+ "atk_acc": 0.62,
228
+ "atk_prompt": "Resolve the following mathematical answer along with {}:",
229
+ "transfer_ori_acc": 9.375,
230
+ "transfer_atk_acc": 13.750000000000002
231
+ },
232
+ {
233
+ "dataset": "math",
234
+ "type": "role",
235
+ "origin_acc": 5.0,
236
+ "origin_prompt": "In the position of a mathematics coach, figure out the answer to this math challenge involving {}:",
237
+ "atk_acc": 0.0,
238
+ "atk_prompt": "In the position of a mathematics coach, develop out the theme to this math challenge involving {}: ",
239
+ "transfer_ori_acc": 13.750000000000002,
240
+ "transfer_atk_acc": 4.375
241
+ }
242
+ ]
results_transfer/chatgpt_google-flan-t5-large/bertattack_3_shot.json ADDED
@@ -0,0 +1,242 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "dataset": "cola",
4
+ "type": "task",
5
+ "origin_acc": 81.0,
6
+ "origin_prompt": "Analyze the provided sentence and classify its grammatical correctness as 'Acceptable' or 'Unacceptable'.",
7
+ "atk_acc": 59.0,
8
+ "atk_prompt": "Analyze the reported speech and classify its inclusive correcting as 'Acceptable' or 'Unacceptable'. ",
9
+ "transfer_ori_acc": 77.10000000000001,
10
+ "transfer_atk_acc": 74.1
11
+ },
12
+ {
13
+ "dataset": "cola",
14
+ "type": "role",
15
+ "origin_acc": 81.0,
16
+ "origin_prompt": "In the role of a syntax analyzer, examine the grammar of the provided sentence and decide if it is correct, answering with 'acceptable' for grammatically correct sentences or 'unacceptable' for incorrect ones.",
17
+ "atk_acc": 75.5,
18
+ "atk_prompt": "using the art of a level analyzeg, examine the grammar of the provided sentence and decide if it is genuine, answering with 'acceptable' for grammatically correct sentences or 'unacceptable' for incorrect ones. ",
19
+ "transfer_ori_acc": 74.2,
20
+ "transfer_atk_acc": 74.0
21
+ },
22
+ {
23
+ "dataset": "mrpc",
24
+ "type": "task",
25
+ "origin_acc": 74.0,
26
+ "origin_prompt": "Compare the following sentences and determine if they share the same semantic meaning by responding with 'equivalent' or 'not_equivalent'.",
27
+ "atk_acc": 47.5,
28
+ "atk_prompt": "Compare the following sentences and determine if they lack the same aspect marking by respectively with 'equivalent' or 'not_equivalent'. ",
29
+ "transfer_ori_acc": 80.3921568627451,
30
+ "transfer_atk_acc": 81.61764705882352
31
+ },
32
+ {
33
+ "dataset": "mrpc",
34
+ "type": "role",
35
+ "origin_acc": 71.5,
36
+ "origin_prompt": "In the role of a textual comparison specialist, examine the given sentences and decide if they share the same meaning, responding with 'equivalent' or 'not_equivalent'.",
37
+ "atk_acc": 47.0,
38
+ "atk_prompt": "have the role of a textual comparison academic, examine the given sentences and determine if they share the same length, responding with 'equivalent' or 'not_equivalent'. ",
39
+ "transfer_ori_acc": 79.65686274509804,
40
+ "transfer_atk_acc": 76.22549019607843
41
+ },
42
+ {
43
+ "dataset": "qnli",
44
+ "type": "task",
45
+ "origin_acc": 84.0,
46
+ "origin_prompt": "Consider the context and question, and indicate if the answer can be logically deduced from the context by responding with 'entailment' or 'not_entailment'.",
47
+ "atk_acc": 71.0,
48
+ "atk_prompt": "put the contexts and question, and be if the answer can be logically deduced from the question by stating with 'entailment' or 'not_entailment'. ",
49
+ "transfer_ori_acc": 85.39999999999999,
50
+ "transfer_atk_acc": 78.9
51
+ },
52
+ {
53
+ "dataset": "qnli",
54
+ "type": "role",
55
+ "origin_acc": 84.0,
56
+ "origin_prompt": "As a textual inference expert, analyze if the answer to the question can be deduced from the provided context and select 'entailment' or 'not_entailment'.",
57
+ "atk_acc": 70.0,
58
+ "atk_prompt": "As a structural inference expert, analyze if the answer to the challenge can be deduced from the query audience and select 'entailment' or 'not_entailment'. ",
59
+ "transfer_ori_acc": 77.2,
60
+ "transfer_atk_acc": 82.8
61
+ },
62
+ {
63
+ "dataset": "qqp",
64
+ "type": "task",
65
+ "origin_acc": 82.5,
66
+ "origin_prompt": "Are the following two questions equivalent or not? Answer me with \"equivalent\" or \"not_equivalent\".",
67
+ "atk_acc": 78.5,
68
+ "atk_prompt": "Are the following versus answers equivalent or not? Answer me with \"equivalent\" or \"not_equivalent\". ",
69
+ "transfer_ori_acc": 87.9,
70
+ "transfer_atk_acc": 87.7
71
+ },
72
+ {
73
+ "dataset": "qqp",
74
+ "type": "role",
75
+ "origin_acc": 83.5,
76
+ "origin_prompt": "As an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions.",
77
+ "atk_acc": 78.0,
78
+ "atk_prompt": "and an instrument for question comparison analysis, consider the questions and establish if their topic is the same, responding with 'equivalent' for matching responses or 'not_equivalent' for between questions. ",
79
+ "transfer_ori_acc": 86.4,
80
+ "transfer_atk_acc": 86.5
81
+ },
82
+ {
83
+ "dataset": "rte",
84
+ "type": "task",
85
+ "origin_acc": 85.0,
86
+ "origin_prompt": "Review the two statements and categorize their relationship as either 'entailment' or 'not_entailment'.",
87
+ "atk_acc": 59.5,
88
+ "atk_prompt": "write the second statements and categorize their relationship as neither 'entailment' or 'not_entailment'. ",
89
+ "transfer_ori_acc": 84.83754512635379,
90
+ "transfer_atk_acc": 84.47653429602889
91
+ },
92
+ {
93
+ "dataset": "rte",
94
+ "type": "role",
95
+ "origin_acc": 83.5,
96
+ "origin_prompt": "While performing entailment analysis, classify the relationship between the provided sentences as 'entailment' or 'not_entailment'.",
97
+ "atk_acc": 79.0,
98
+ "atk_prompt": "was performing entailment analysis, classify the relationship between the first phrase as 'entailment' or 'not_entailment'. ",
99
+ "transfer_ori_acc": 83.39350180505414,
100
+ "transfer_atk_acc": 83.39350180505414
101
+ },
102
+ {
103
+ "dataset": "sst2",
104
+ "type": "task",
105
+ "origin_acc": 95.5,
106
+ "origin_prompt": "After examining the following expression, label its emotion as either 'positive' or 'negative'.",
107
+ "atk_acc": 94.5,
108
+ "atk_prompt": "After examining the following expression, label its emotion as either 'positive' or 'negative' Q47Jmd4lMV. ",
109
+ "transfer_ori_acc": 93.69266055045871,
110
+ "transfer_atk_acc": 93.92201834862385
111
+ },
112
+ {
113
+ "dataset": "sst2",
114
+ "type": "role",
115
+ "origin_acc": 97.5,
116
+ "origin_prompt": "In the capacity of a sentiment classifier, decide whether the given quote is 'positive' or 'negative'.",
117
+ "atk_acc": 96.5,
118
+ "atk_prompt": "In the capacity of a sentiment classifier, decide whether the given quote is 'positive' or 'negative' 9Q3S1F94fE. ",
119
+ "transfer_ori_acc": 94.95412844036697,
120
+ "transfer_atk_acc": 94.95412844036697
121
+ },
122
+ {
123
+ "dataset": "wnli",
124
+ "type": "task",
125
+ "origin_acc": 69.01,
126
+ "origin_prompt": "Review the two statements and categorize their relationship as either 'entailment' or 'not_entailment'.",
127
+ "atk_acc": 59.15,
128
+ "atk_prompt": "examine the paired actions and categorize their relationship as is 'entailment' or 'not_entailment'. ",
129
+ "transfer_ori_acc": 63.38028169014085,
130
+ "transfer_atk_acc": 66.19718309859155
131
+ },
132
+ {
133
+ "dataset": "wnli",
134
+ "type": "role",
135
+ "origin_acc": 66.2,
136
+ "origin_prompt": "In the role of a semantic relationship analyzer, examine the connection between the given sentences and decide if they exhibit entailment or not_entailment. Answer with 'entailment' or 'not_entailment'.",
137
+ "atk_acc": 56.34,
138
+ "atk_prompt": "In the role of a semantic relationship analyzer, define the connection between the given links and decide if they display entailment or not_entailment. Answer with 'entailment' or 'not_entailment'. ",
139
+ "transfer_ori_acc": 56.33802816901409,
140
+ "transfer_atk_acc": 66.19718309859155
141
+ },
142
+ {
143
+ "dataset": "mmlu",
144
+ "type": "task",
145
+ "origin_acc": 66.5,
146
+ "origin_prompt": "Please respond to the multiple-choice question about {} by selecting the appropriate answer: 'A', 'B', 'C', or 'D'.",
147
+ "atk_acc": 11.5,
148
+ "atk_prompt": "at respond to the multiple-independent question about {} by selecting the inappropriate question: 'A', 'B', 'C', or 'D'. ",
149
+ "transfer_ori_acc": 45.21276595744681,
150
+ "transfer_atk_acc": 33.51063829787234
151
+ },
152
+ {
153
+ "dataset": "mmlu",
154
+ "type": "role",
155
+ "origin_acc": 65.5,
156
+ "origin_prompt": "Considering your familiarity with {}, attend to the following multiple-choice question by picking 'A', 'B', 'C', or 'D'.",
157
+ "atk_acc": 52.5,
158
+ "atk_prompt": "all your ability with {}, attend to the following multiple-select question by picking 'A', 'B', 'C', or 'D'.",
159
+ "transfer_ori_acc": 46.98581560283688,
160
+ "transfer_atk_acc": 45.92198581560284
161
+ },
162
+ {
163
+ "dataset": "squad_v2",
164
+ "type": "task",
165
+ "origin_acc": 72.25,
166
+ "origin_prompt": "Based on the given context, provide the best possible answer. If there's no answer available in the context, reply with 'unanswerable'.",
167
+ "atk_acc": 65.81,
168
+ "atk_prompt": "Based on the use context, provide the best possible answer. who there'tis no answer provided in the area, reply with 'unanswerable'.",
169
+ "transfer_ori_acc": 87.04992770387507,
170
+ "transfer_atk_acc": 87.54992770387507
171
+ },
172
+ {
173
+ "dataset": "squad_v2",
174
+ "type": "role",
175
+ "origin_acc": 72.45,
176
+ "origin_prompt": "As a proficient expert in the given context, search for the most relevant answer. If the answer cannot be found, respond by saying 'unanswerable'.",
177
+ "atk_acc": 61.02,
178
+ "atk_prompt": "As a proficient expert in the given context, try for the most specific out. If the question lacks be search, end by as 'unanswerable'.",
179
+ "transfer_ori_acc": 87.31659437054175,
180
+ "transfer_atk_acc": 87.68023073417811
181
+ },
182
+ {
183
+ "dataset": "iwslt",
184
+ "type": "task",
185
+ "origin_acc": 0.24,
186
+ "origin_prompt": "Convert the following sentence from its original {} language to the target language {}.",
187
+ "atk_acc": 0.21,
188
+ "atk_prompt": "Convert the following sentence from its actual {} language to the net language {}. ",
189
+ "transfer_ori_acc": 0.2240211126734209,
190
+ "transfer_atk_acc": 0.22296244610321242
191
+ },
192
+ {
193
+ "dataset": "iwslt",
194
+ "type": "role",
195
+ "origin_acc": 0.24,
196
+ "origin_prompt": "Assuming the responsibilities of a professional translator, translate the subsequent text passage from {} to {}.",
197
+ "atk_acc": 0.17,
198
+ "atk_prompt": "what the responsibilities of a professional scholar, translate the subsequent text passage from {} to {}. ",
199
+ "transfer_ori_acc": 0.22030199520456395,
200
+ "transfer_atk_acc": 0.22001437607394084
201
+ },
202
+ {
203
+ "dataset": "un_multi",
204
+ "type": "task",
205
+ "origin_acc": 0.36,
206
+ "origin_prompt": "Translate the following text from the source language {} to the target language {}.",
207
+ "atk_acc": 0.35,
208
+ "atk_prompt": "Translate the following area from the standard language {} to the target language {}. ",
209
+ "transfer_ori_acc": 0.3090347352409074,
210
+ "transfer_atk_acc": 0.30893566132252137
211
+ },
212
+ {
213
+ "dataset": "un_multi",
214
+ "type": "role",
215
+ "origin_acc": 0.35,
216
+ "origin_prompt": "As a qualified translator, convert the given sentence from its original {} language to the target language {}.",
217
+ "atk_acc": 0.26,
218
+ "atk_prompt": "As a qualified proper, converts the target sentence from its original {} language to the english language {}. ",
219
+ "transfer_ori_acc": 0.31127691224033316,
220
+ "transfer_atk_acc": 0.306608167162218
221
+ },
222
+ {
223
+ "dataset": "math",
224
+ "type": "task",
225
+ "origin_acc": 22.5,
226
+ "origin_prompt": "Compute the result of this math task concerning {}.",
227
+ "atk_acc": 3.12,
228
+ "atk_prompt": "Compute the degree of this math task concerning {}. ",
229
+ "transfer_ori_acc": 16.25,
230
+ "transfer_atk_acc": 16.875
231
+ },
232
+ {
233
+ "dataset": "math",
234
+ "type": "role",
235
+ "origin_acc": 22.5,
236
+ "origin_prompt": "As a mathematics instructor, calculate the answer to the following problem related to {}.",
237
+ "atk_acc": 7.5,
238
+ "atk_prompt": "as a mathematics instructor, evaluate the reply to the following problem and to {}. ",
239
+ "transfer_ori_acc": 17.5,
240
+ "transfer_atk_acc": 16.25
241
+ }
242
+ ]
results_transfer/chatgpt_google-flan-t5-large/checklist_0_shot.json ADDED
@@ -0,0 +1,242 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "dataset": "cola",
4
+ "type": "task",
5
+ "origin_acc": 78.5,
6
+ "origin_prompt": "Review the sentence below and identify whether its grammar is 'Acceptable' or 'Unacceptable':",
7
+ "atk_acc": 72.5,
8
+ "atk_prompt": "Review the sentence below and identify whether its grammar is 'Acceptable' or 'Unacceptable' zexKUTWJYG:",
9
+ "transfer_ori_acc": 76.5,
10
+ "transfer_atk_acc": 75.5
11
+ },
12
+ {
13
+ "dataset": "cola",
14
+ "type": "role",
15
+ "origin_acc": 79.0,
16
+ "origin_prompt": "While performing grammar analysis, classify the grammar of the following sentence as 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar:",
17
+ "atk_acc": 45.0,
18
+ "atk_prompt": "While performing grammar analysis, classify the grammar of the following sentence as 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar zexKUTWJYG:",
19
+ "transfer_ori_acc": 75.5,
20
+ "transfer_atk_acc": 74.1
21
+ },
22
+ {
23
+ "dataset": "mrpc",
24
+ "type": "task",
25
+ "origin_acc": 75.5,
26
+ "origin_prompt": "Evaluate whether the two provided sentences convey the same meaning by answering 'equivalent' or 'not_equivalent'.",
27
+ "atk_acc": 72.0,
28
+ "atk_prompt": "Evaluate whether the two provided sentences convey the same meaning by answering 'equivalent' or 'not_equivalent' SvAp8RlOFn. ",
29
+ "transfer_ori_acc": 81.86274509803921,
30
+ "transfer_atk_acc": 82.35294117647058
31
+ },
32
+ {
33
+ "dataset": "mrpc",
34
+ "type": "role",
35
+ "origin_acc": 75.5,
36
+ "origin_prompt": "In your capacity as a semantic assessment tool, evaluate the provided sentences and classify their meanings as 'equivalent' or 'not_equivalent'.",
37
+ "atk_acc": 73.5,
38
+ "atk_prompt": "In your capacity as a semantic assessment tool, evaluate the provided sentences and classify their meanings as 'equivalent' or 'not_equivalent' Kw6nrs57gH. ",
39
+ "transfer_ori_acc": 76.47058823529412,
40
+ "transfer_atk_acc": 74.75490196078431
41
+ },
42
+ {
43
+ "dataset": "qnli",
44
+ "type": "task",
45
+ "origin_acc": 78.0,
46
+ "origin_prompt": "Evaluate whether the given context supports the answer to the question by responding with 'entailment' or 'not_entailment'.",
47
+ "atk_acc": 75.5,
48
+ "atk_prompt": "Evaluate whether the given context supports the answer to the question by responding with 'entailment' or 'not_entailment' ItGDrrA1Sc. ",
49
+ "transfer_ori_acc": 90.4,
50
+ "transfer_atk_acc": 89.5
51
+ },
52
+ {
53
+ "dataset": "qnli",
54
+ "type": "role",
55
+ "origin_acc": 78.0,
56
+ "origin_prompt": "As a semantic researcher, evaluate whether the provided context supports the answer to the question and choose 'entailment' or 'not_entailment'.",
57
+ "atk_acc": 74.5,
58
+ "atk_prompt": "As a semantic researcher, evaluate whether the provided context supports the answer to the question and choose 'entailment' or 'not_entailment' 6a4Yn3RGVc. ",
59
+ "transfer_ori_acc": 89.1,
60
+ "transfer_atk_acc": 88.7
61
+ },
62
+ {
63
+ "dataset": "qqp",
64
+ "type": "task",
65
+ "origin_acc": 81.5,
66
+ "origin_prompt": "Assess whether the following statements are identical in meaning by answering 'equivalent' or 'not_equivalent'.",
67
+ "atk_acc": 78.0,
68
+ "atk_prompt": "Assess whether the following statements are identical in meaning by answering 'equivalent' or 'not_equivalent' iia2yL9Uzm. ",
69
+ "transfer_ori_acc": 87.6,
70
+ "transfer_atk_acc": 87.7
71
+ },
72
+ {
73
+ "dataset": "qqp",
74
+ "type": "role",
75
+ "origin_acc": 83.0,
76
+ "origin_prompt": "As an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions.",
77
+ "atk_acc": 80.5,
78
+ "atk_prompt": "As an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions Cev0s49fbe. ",
79
+ "transfer_ori_acc": 87.3,
80
+ "transfer_atk_acc": 87.5
81
+ },
82
+ {
83
+ "dataset": "rte",
84
+ "type": "task",
85
+ "origin_acc": 82.5,
86
+ "origin_prompt": "Determine if the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment'.",
87
+ "atk_acc": 80.0,
88
+ "atk_prompt": "Determine if the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment' MSAddJ4D2a.",
89
+ "transfer_ori_acc": 84.11552346570397,
90
+ "transfer_atk_acc": 84.11552346570397
91
+ },
92
+ {
93
+ "dataset": "rte",
94
+ "type": "role",
95
+ "origin_acc": 81.5,
96
+ "origin_prompt": "As an instrument for entailment evaluation, consider the two sentences and determine if their relationship is 'entailment' or 'not_entailment'. Respond with 'entailment' or 'not_entailment':",
97
+ "atk_acc": 80.0,
98
+ "atk_prompt": "As an instrument for entailment evaluation, consider the two sentences and determine if their relationship is 'entailment' or 'not_entailment'. Respond with 'entailment' or 'not_entailment' 6a4Yn3RGVc:",
99
+ "transfer_ori_acc": 85.5595667870036,
100
+ "transfer_atk_acc": 85.92057761732852
101
+ },
102
+ {
103
+ "dataset": "sst2",
104
+ "type": "task",
105
+ "origin_acc": 93.0,
106
+ "origin_prompt": "Determine the overall sentiment of this sentence, categorizing it as 'positive' or 'negative':",
107
+ "atk_acc": 90.0,
108
+ "atk_prompt": "Determine the overall sentiment of this sentence, categorizing it as 'positive' or 'negative' AMsRIKZniY: ",
109
+ "transfer_ori_acc": 94.26605504587155,
110
+ "transfer_atk_acc": 94.38073394495413
111
+ },
112
+ {
113
+ "dataset": "sst2",
114
+ "type": "role",
115
+ "origin_acc": 92.5,
116
+ "origin_prompt": "Functioning as a sentiment identification tool, assess if the following expression is 'positive' or 'negative'. Classify:",
117
+ "atk_acc": 92.0,
118
+ "atk_prompt": "Functioning as a sentiment identification tool, assess if the following expression is 'positive' or 'negative'. Classify Cev0s49fbe: ",
119
+ "transfer_ori_acc": 94.83944954128441,
120
+ "transfer_atk_acc": 94.83944954128441
121
+ },
122
+ {
123
+ "dataset": "wnli",
124
+ "type": "task",
125
+ "origin_acc": 64.79,
126
+ "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.",
127
+ "atk_acc": 59.15,
128
+ "atk_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word zexKUTWJYG. ",
129
+ "transfer_ori_acc": 66.19718309859155,
130
+ "transfer_atk_acc": 66.19718309859155
131
+ },
132
+ {
133
+ "dataset": "wnli",
134
+ "type": "role",
135
+ "origin_acc": 63.38,
136
+ "origin_prompt": "In the role of a semantic relationship analyzer, examine the connection between the given sentences and decide if they exhibit entailment or not_entailment. Answer with 'entailment' or 'not_entailment':",
137
+ "atk_acc": 57.75,
138
+ "atk_prompt": "In the role of a semantic relationship analyzer, examine the connection between the given sentences and decide if they exhibit entailment or not_entailment. Answer with 'entailment' or 'not_entailment' 5XeflW1ZJc:",
139
+ "transfer_ori_acc": 54.929577464788736,
140
+ "transfer_atk_acc": 53.52112676056338
141
+ },
142
+ {
143
+ "dataset": "mmlu",
144
+ "type": "task",
145
+ "origin_acc": 64.0,
146
+ "origin_prompt": "Regarding the following multiple-choice question on {}, pick the correct answer from the options 'A', 'B', 'C', or 'D'.",
147
+ "atk_acc": 58.0,
148
+ "atk_prompt": "Regarding the following multiple-choice question on {}, pick the correct answer from the options 'A', 'B', 'C', or 'D' Pbl3tYuoRo. ",
149
+ "transfer_ori_acc": 44.32624113475177,
150
+ "transfer_atk_acc": 44.50354609929078
151
+ },
152
+ {
153
+ "dataset": "mmlu",
154
+ "type": "role",
155
+ "origin_acc": 61.5,
156
+ "origin_prompt": "As a skilled person in the field of {}, please respond to the multiple-choice question by choosing 'A', 'B', 'C', or 'D'.",
157
+ "atk_acc": 58.0,
158
+ "atk_prompt": "As a skilled person in the field of {}, please respond to the multiple-choice question by choosing 'A', 'B', 'C', or 'D' IhuBIhoPGc.",
159
+ "transfer_ori_acc": 44.680851063829785,
160
+ "transfer_atk_acc": 45.0354609929078
161
+ },
162
+ {
163
+ "dataset": "squad_v2",
164
+ "type": "task",
165
+ "origin_acc": 69.16,
166
+ "origin_prompt": "Based on the given context, provide the best possible answer. If there's no answer available in the context, respond with 'unanswerable'.",
167
+ "atk_acc": 64.99,
168
+ "atk_prompt": "Based on the given context, provide the best possible answer. If there's no answer available in the context, respond with 'unanswerable' ItGDrrA1Sc.",
169
+ "transfer_ori_acc": 87.04992770387507,
170
+ "transfer_atk_acc": 87.54992770387507
171
+ },
172
+ {
173
+ "dataset": "squad_v2",
174
+ "type": "role",
175
+ "origin_acc": 64.31,
176
+ "origin_prompt": "As an expert with a deep understanding of the context, find the best answer. If the context doesn't include an answer, say 'unanswerable'.",
177
+ "atk_acc": 60.2,
178
+ "atk_prompt": "As an expert with a deep understanding of the context, find the best answer. If the context doesn't include an answer, say 'unanswerable' d6ZQ3u0GBQ.",
179
+ "transfer_ori_acc": 88.04992770387507,
180
+ "transfer_atk_acc": 88.04992770387507
181
+ },
182
+ {
183
+ "dataset": "iwslt",
184
+ "type": "task",
185
+ "origin_acc": 0.22,
186
+ "origin_prompt": "Translate the provided sentence from {} to {} while maintaining the original meaning and context:",
187
+ "atk_acc": 0.21,
188
+ "atk_prompt": "Translate the provided sentence from {} to {} while maintaining the original meaning and context zq0DcZ5dnI:",
189
+ "transfer_ori_acc": 0.12847010019593297,
190
+ "transfer_atk_acc": 0.13452105947739143
191
+ },
192
+ {
193
+ "dataset": "iwslt",
194
+ "type": "role",
195
+ "origin_acc": 0.22,
196
+ "origin_prompt": "In the capacity of a proficient translator, please change the following sentence from {} to {}:",
197
+ "atk_acc": 0.21,
198
+ "atk_prompt": "In the capacity of a proficient translator, please change the following sentence from {} to SFPCqpiClT {}: ",
199
+ "transfer_ori_acc": 0.13904950536673938,
200
+ "transfer_atk_acc": 0.12181412545498613
201
+ },
202
+ {
203
+ "dataset": "un_multi",
204
+ "type": "task",
205
+ "origin_acc": 0.34,
206
+ "origin_prompt": "Perform a machine translation for the provided sentence, changing it from {} to {}:",
207
+ "atk_acc": 0.33,
208
+ "atk_prompt": "Perform a machine translation for the provided sentence, changing it from {} to Kw6nrs57gH {}:",
209
+ "transfer_ori_acc": 0.26814405340158265,
210
+ "transfer_atk_acc": 0.26255854470098927
211
+ },
212
+ {
213
+ "dataset": "un_multi",
214
+ "type": "role",
215
+ "origin_acc": 0.34,
216
+ "origin_prompt": "While acting as an experienced language expert, translate the following text from {} to {}:",
217
+ "atk_acc": 0.33,
218
+ "atk_prompt": "While acting as an experienced language expert, translate the following text from {} to Kw6nrs57gH {}: ",
219
+ "transfer_ori_acc": 0.26489202618745367,
220
+ "transfer_atk_acc": 0.25410191629878426
221
+ },
222
+ {
223
+ "dataset": "math",
224
+ "type": "task",
225
+ "origin_acc": 6.88,
226
+ "origin_prompt": "Calculate the answer to the following math query about {}:",
227
+ "atk_acc": 5.0,
228
+ "atk_prompt": "Calculate the answer to the following math query about 6a4Yn3RGVc {}:",
229
+ "transfer_ori_acc": 13.125,
230
+ "transfer_atk_acc": 11.875
231
+ },
232
+ {
233
+ "dataset": "math",
234
+ "type": "role",
235
+ "origin_acc": 5.0,
236
+ "origin_prompt": "Embodying a mathematics advisor, find the answer to the following math problem with {}:",
237
+ "atk_acc": 2.5,
238
+ "atk_prompt": "Embodying a mathematics advisor, find the answer to the following math problem with 9vGXgnbWB8 {}: ",
239
+ "transfer_ori_acc": 11.25,
240
+ "transfer_atk_acc": 12.5
241
+ }
242
+ ]
results_transfer/chatgpt_google-flan-t5-large/checklist_3_shot.json ADDED
@@ -0,0 +1,242 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "dataset": "cola",
4
+ "type": "task",
5
+ "origin_acc": 81.0,
6
+ "origin_prompt": "Examine the sentence and decide if its grammar is 'Acceptable' or 'Unacceptable'.",
7
+ "atk_acc": 78.0,
8
+ "atk_prompt": "Examine the sentence and decide if its grammar is 'Acceptable' or 'Unacceptable' 9Q3S1F94fE. ",
9
+ "transfer_ori_acc": 76.3,
10
+ "transfer_atk_acc": 76.6
11
+ },
12
+ {
13
+ "dataset": "cola",
14
+ "type": "role",
15
+ "origin_acc": 81.5,
16
+ "origin_prompt": "Functioning as a grammar evaluation tool, analyze the given sentence and decide if it is grammatically correct, responding with 'acceptable' or 'unacceptable'.",
17
+ "atk_acc": 79.5,
18
+ "atk_prompt": "Functioning as a grammar evaluation tool, analyze the given sentence and decide if it is grammatically correct, responding with 'acceptable' or 'unacceptable' 6a4Yn3RGVc. ",
19
+ "transfer_ori_acc": 75.2,
20
+ "transfer_atk_acc": 76.3
21
+ },
22
+ {
23
+ "dataset": "mrpc",
24
+ "type": "task",
25
+ "origin_acc": 74.0,
26
+ "origin_prompt": "Assess if the two given sentences have equivalent meanings by selecting 'equivalent' or 'not_equivalent'.",
27
+ "atk_acc": 68.5,
28
+ "atk_prompt": "Assess if the two given sentences have equivalent meanings by selecting 'equivalent' or 'not_equivalent' LkF0FZxMZ4. ",
29
+ "transfer_ori_acc": 82.84313725490196,
30
+ "transfer_atk_acc": 82.59803921568627
31
+ },
32
+ {
33
+ "dataset": "mrpc",
34
+ "type": "role",
35
+ "origin_acc": 72.0,
36
+ "origin_prompt": "As a linguistic comparator, review the following pair of sentences and determine their semantic equivalence by choosing 'equivalent' or 'not_equivalent'.",
37
+ "atk_acc": 68.0,
38
+ "atk_prompt": "As a linguistic comparator, review the following pair of sentences and determine their semantic equivalence by choosing 'equivalent' or 'not_equivalent' bhrRSokrfa. ",
39
+ "transfer_ori_acc": 79.90196078431373,
40
+ "transfer_atk_acc": 80.63725490196079
41
+ },
42
+ {
43
+ "dataset": "qnli",
44
+ "type": "task",
45
+ "origin_acc": 84.5,
46
+ "origin_prompt": "Examine the context and question, and determine if the context logically implies the answer by selecting 'entailment' or 'not_entailment'.",
47
+ "atk_acc": 80.5,
48
+ "atk_prompt": "Examine the context and question, and determine if the context logically implies the answer by selecting 'entailment' or 'not_entailment' 9Q3S1F94fE. ",
49
+ "transfer_ori_acc": 86.5,
50
+ "transfer_atk_acc": 86.5
51
+ },
52
+ {
53
+ "dataset": "qnli",
54
+ "type": "role",
55
+ "origin_acc": 84.5,
56
+ "origin_prompt": "As a textual inference expert, analyze if the answer to the question can be deduced from the provided context and select 'entailment' or 'not_entailment'.",
57
+ "atk_acc": 81.0,
58
+ "atk_prompt": "As a textual inference expert, analyze if the answer to the question can be deduced from the provided context and select 'entailment' or 'not_entailment' YHQiDfFhw4. ",
59
+ "transfer_ori_acc": 77.2,
60
+ "transfer_atk_acc": 74.2
61
+ },
62
+ {
63
+ "dataset": "qqp",
64
+ "type": "task",
65
+ "origin_acc": 82.0,
66
+ "origin_prompt": "Can these two statements be considered equal in meaning? Answer with 'equivalent' or 'not_equivalent'.",
67
+ "atk_acc": 79.0,
68
+ "atk_prompt": "Can these two statements be considered equal in meaning? Answer with 'equivalent' or 'not_equivalent' xikCjkMydH. ",
69
+ "transfer_ori_acc": 87.6,
70
+ "transfer_atk_acc": 87.8
71
+ },
72
+ {
73
+ "dataset": "qqp",
74
+ "type": "role",
75
+ "origin_acc": 83.0,
76
+ "origin_prompt": "In the capacity of a question assessment system, indicate if the meaning of the provided questions is the same, responding with 'equivalent' or 'not_equivalent'.",
77
+ "atk_acc": 81.5,
78
+ "atk_prompt": "In the capacity of a question assessment system, indicate if the meaning of the provided questions is the same, responding with 'equivalent' or 'not_equivalent' 5XeflW1ZJc. ",
79
+ "transfer_ori_acc": 86.1,
80
+ "transfer_atk_acc": 85.9
81
+ },
82
+ {
83
+ "dataset": "rte",
84
+ "type": "task",
85
+ "origin_acc": 84.5,
86
+ "origin_prompt": "Review the two statements and categorize their relationship as either 'entailment' or 'not_entailment'.",
87
+ "atk_acc": 80.5,
88
+ "atk_prompt": "Review the two statements and categorize their relationship as either 'entailment' or 'not_entailment' 5XeflW1ZJc. ",
89
+ "transfer_ori_acc": 84.83754512635379,
90
+ "transfer_atk_acc": 85.5595667870036
91
+ },
92
+ {
93
+ "dataset": "rte",
94
+ "type": "role",
95
+ "origin_acc": 84.0,
96
+ "origin_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment'.",
97
+ "atk_acc": 80.5,
98
+ "atk_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment' bhrRSokrfa. ",
99
+ "transfer_ori_acc": 83.03249097472924,
100
+ "transfer_atk_acc": 83.03249097472924
101
+ },
102
+ {
103
+ "dataset": "sst2",
104
+ "type": "task",
105
+ "origin_acc": 96.0,
106
+ "origin_prompt": "Read the provided excerpt and choose between 'positive' and 'negative' to describe its sentiment.",
107
+ "atk_acc": 77.5,
108
+ "atk_prompt": "go the read excerpt and be between 'positive' and 'negative' to describing its mood. ",
109
+ "transfer_ori_acc": 94.72477064220183,
110
+ "transfer_atk_acc": 94.61009174311926
111
+ },
112
+ {
113
+ "dataset": "sst2",
114
+ "type": "role",
115
+ "origin_acc": 97.5,
116
+ "origin_prompt": "In the capacity of a sentiment classifier, decide whether the given quote is 'positive' or 'negative'.",
117
+ "atk_acc": 13.5,
118
+ "atk_prompt": "in the capacity of a sentiment classizer, chose neither the said quotes is 'positive' or 'negative'. ",
119
+ "transfer_ori_acc": 94.95412844036697,
120
+ "transfer_atk_acc": 94.95412844036697
121
+ },
122
+ {
123
+ "dataset": "wnli",
124
+ "type": "task",
125
+ "origin_acc": 69.01,
126
+ "origin_prompt": "Indicate if the connection between the following sentences is 'entailment' or 'not_entailment'.",
127
+ "atk_acc": 61.97,
128
+ "atk_prompt": "Indicate if the connection between the followping senZences is 'entailment' or 'not_entailment'. ",
129
+ "transfer_ori_acc": 64.7887323943662,
130
+ "transfer_atk_acc": 69.01408450704226
131
+ },
132
+ {
133
+ "dataset": "wnli",
134
+ "type": "role",
135
+ "origin_acc": 66.2,
136
+ "origin_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment'.",
137
+ "atk_acc": 57.75,
138
+ "atk_prompt": "In your role as an entailment analysis tool, asress the rlationship between the given sentences and classify it as 'entailment' or 'not_entailment'. ",
139
+ "transfer_ori_acc": 57.74647887323944,
140
+ "transfer_atk_acc": 54.929577464788736
141
+ },
142
+ {
143
+ "dataset": "mmlu",
144
+ "type": "task",
145
+ "origin_acc": 66.0,
146
+ "origin_prompt": "Please respond to the multiple-choice question about {} by selecting the appropriate answer: 'A', 'B', 'C', or 'D'.",
147
+ "atk_acc": 63.0,
148
+ "atk_prompt": "Please respond to the multiple-choice question about {} by selecting the appropriate answer: 'A', 'B', 'C', or 'D' MQnugHcaoy. ",
149
+ "transfer_ori_acc": 45.21276595744681,
150
+ "transfer_atk_acc": 45.21276595744681
151
+ },
152
+ {
153
+ "dataset": "mmlu",
154
+ "type": "role",
155
+ "origin_acc": 65.5,
156
+ "origin_prompt": "As someone well-versed in {}, please address the multiple-choice question below by selecting 'A', 'B', 'C', or 'D'.",
157
+ "atk_acc": 63.0,
158
+ "atk_prompt": "As someone well-versed in {}, please address the multiple-choice question below by selecting 'A', 'B', 'C', or 'D' mOJ9nEwT4f.",
159
+ "transfer_ori_acc": 46.63120567375886,
160
+ "transfer_atk_acc": 46.98581560283688
161
+ },
162
+ {
163
+ "dataset": "squad_v2",
164
+ "type": "task",
165
+ "origin_acc": 73.25,
166
+ "origin_prompt": "Based on the given context, provide the best possible answer. If there's no answer available in the context, reply with 'unanswerable'.",
167
+ "atk_acc": 71.09,
168
+ "atk_prompt": "Based on the given context, provide the best possible answer. If there's no answer available in the context, reply with 'unanswerable' ItGDrrA1Sc.",
169
+ "transfer_ori_acc": 87.04992770387507,
170
+ "transfer_atk_acc": 87.04992770387507
171
+ },
172
+ {
173
+ "dataset": "squad_v2",
174
+ "type": "role",
175
+ "origin_acc": 72.63,
176
+ "origin_prompt": "As a proficient expert in the given context, search for the most relevant answer. If the answer cannot be found, respond by saying 'unanswerable'.",
177
+ "atk_acc": 63.07,
178
+ "atk_prompt": "As a proficient expert in the given context, search for the most relevant answer. If the answer cannot be found, respond by saying 'unanswerable' XStsD2pUzu.",
179
+ "transfer_ori_acc": 87.31659437054175,
180
+ "transfer_atk_acc": 87.31659437054175
181
+ },
182
+ {
183
+ "dataset": "iwslt",
184
+ "type": "task",
185
+ "origin_acc": 0.24,
186
+ "origin_prompt": "Convert the following sentence from its original {} language to the target language {}.",
187
+ "atk_acc": 0.22,
188
+ "atk_prompt": "Convert the following sentence from its original {} language to the target language 5XeflW1ZJc {}. ",
189
+ "transfer_ori_acc": 0.2240211126734209,
190
+ "transfer_atk_acc": 0.22156249809369105
191
+ },
192
+ {
193
+ "dataset": "iwslt",
194
+ "type": "role",
195
+ "origin_acc": 0.24,
196
+ "origin_prompt": "In the role of a language expert, perform a machine translation for the provided sentence, changing it from {} to {}.",
197
+ "atk_acc": 0.23,
198
+ "atk_prompt": "In the role of a language expert, perform a machine translation for the provided sentence, changing it from {} to MQnugHcaoy {}. ",
199
+ "transfer_ori_acc": 0.22070346732323043,
200
+ "transfer_atk_acc": 0.2112357399871226
201
+ },
202
+ {
203
+ "dataset": "un_multi",
204
+ "type": "task",
205
+ "origin_acc": 0.35,
206
+ "origin_prompt": "Perform a machine translation for the provided sentence, changing it from {} to {}.",
207
+ "atk_acc": 0.34,
208
+ "atk_prompt": "Perform a machine translation for the provided sentence, changing it from {} to Kw6nrs57gH {}. ",
209
+ "transfer_ori_acc": 0.30870734220870066,
210
+ "transfer_atk_acc": 0.3125050772206132
211
+ },
212
+ {
213
+ "dataset": "un_multi",
214
+ "type": "role",
215
+ "origin_acc": 0.35,
216
+ "origin_prompt": "As a qualified translator, convert the given sentence from its original {} language to the target language {}.",
217
+ "atk_acc": 0.31,
218
+ "atk_prompt": "As a qualified translator, convert the given sentence from its original {} language to the target language 5XeflW1ZJc {}. ",
219
+ "transfer_ori_acc": 0.31127691224033316,
220
+ "transfer_atk_acc": 0.31337641480593154
221
+ },
222
+ {
223
+ "dataset": "math",
224
+ "type": "task",
225
+ "origin_acc": 23.12,
226
+ "origin_prompt": "Calculate the answer to the following math query about {}.",
227
+ "atk_acc": 19.38,
228
+ "atk_prompt": "Calculate the answer to the following math query about 5XeflW1ZJc {}. ",
229
+ "transfer_ori_acc": 16.25,
230
+ "transfer_atk_acc": 17.5
231
+ },
232
+ {
233
+ "dataset": "math",
234
+ "type": "role",
235
+ "origin_acc": 22.5,
236
+ "origin_prompt": "As a mathematics instructor, calculate the answer to the following problem related to {}.",
237
+ "atk_acc": 21.25,
238
+ "atk_prompt": "As a mathematics instructor, calculate the answer to the following problem related to AMsRIKZniY {}. ",
239
+ "transfer_ori_acc": 17.5,
240
+ "transfer_atk_acc": 17.5
241
+ }
242
+ ]
results_transfer/chatgpt_google-flan-t5-large/deepwordbug_0_shot.json ADDED
@@ -0,0 +1,242 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "dataset": "cola",
4
+ "type": "task",
5
+ "origin_acc": 79.0,
6
+ "origin_prompt": "Review the sentence below and identify whether its grammar is 'Acceptable' or 'Unacceptable':",
7
+ "atk_acc": 74.5,
8
+ "atk_prompt": "Review the sentece below and identify whether its grammar is 'Acceptable' or 'Unacceptable':",
9
+ "transfer_ori_acc": 76.5,
10
+ "transfer_atk_acc": 73.4
11
+ },
12
+ {
13
+ "dataset": "cola",
14
+ "type": "role",
15
+ "origin_acc": 79.5,
16
+ "origin_prompt": "While performing grammar analysis, classify the grammar of the following sentence as 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar:",
17
+ "atk_acc": 0.0,
18
+ "atk_prompt": "nhile performing grammr analysis, classify the rgammar of the fllowing sentenZe as 'acceptable' for corFect Xrammar or 'unacceptable' for incorrect gramUar:",
19
+ "transfer_ori_acc": 75.5,
20
+ "transfer_atk_acc": 72.5
21
+ },
22
+ {
23
+ "dataset": "mrpc",
24
+ "type": "task",
25
+ "origin_acc": 74.0,
26
+ "origin_prompt": "Evaluate whether the two provided sentences convey the same meaning by answering 'equivalent' or 'not_equivalent'.",
27
+ "atk_acc": 70.0,
28
+ "atk_prompt": "Evaluate whether the two provRided sentnces convey the same meaning by answering 'equivalent' or 'not_equivalent'. ",
29
+ "transfer_ori_acc": 81.86274509803921,
30
+ "transfer_atk_acc": 78.43137254901961
31
+ },
32
+ {
33
+ "dataset": "mrpc",
34
+ "type": "role",
35
+ "origin_acc": 76.0,
36
+ "origin_prompt": "In your capacity as a semantic assessment tool, evaluate the provided sentences and classify their meanings as 'equivalent' or 'not_equivalent'.",
37
+ "atk_acc": 53.0,
38
+ "atk_prompt": "nI your capacity as a sematnic asseJssment tol, evaluate the prvoided senRtences and classify their meainngs as 'equivalent' or 'not_equivalent'. ",
39
+ "transfer_ori_acc": 76.47058823529412,
40
+ "transfer_atk_acc": 78.67647058823529
41
+ },
42
+ {
43
+ "dataset": "qnli",
44
+ "type": "task",
45
+ "origin_acc": 78.0,
46
+ "origin_prompt": "Evaluate whether the given context supports the answer to the question by responding with 'entailment' or 'not_entailment'.",
47
+ "atk_acc": 50.0,
48
+ "atk_prompt": "Exaluate whetxher the iven contet Supports the anwser to the queston by respEonding with 'entailment' or 'not_entailment'. ",
49
+ "transfer_ori_acc": 90.4,
50
+ "transfer_atk_acc": 50.1
51
+ },
52
+ {
53
+ "dataset": "qnli",
54
+ "type": "role",
55
+ "origin_acc": 79.5,
56
+ "origin_prompt": "As a textual inference expert, analyze if the answer to the question can be deduced from the provided context and select 'entailment' or 'not_entailment'.",
57
+ "atk_acc": 19.5,
58
+ "atk_prompt": "As a textula inference expert, aInalyze if the answer to the question can be deducud from the provided context and Melect 'entailment' or 'not_entailment'. ",
59
+ "transfer_ori_acc": 78.10000000000001,
60
+ "transfer_atk_acc": 81.6
61
+ },
62
+ {
63
+ "dataset": "qqp",
64
+ "type": "task",
65
+ "origin_acc": 82.0,
66
+ "origin_prompt": "Assess whether the following statements are identical in meaning by answering 'equivalent' or 'not_equivalent'.",
67
+ "atk_acc": 45.0,
68
+ "atk_prompt": "Assess whethdr the followng sLtatements are identocal in meuaning by answerinJ 'equivalent' or 'not_equivalent'. ",
69
+ "transfer_ori_acc": 87.6,
70
+ "transfer_atk_acc": 85.0
71
+ },
72
+ {
73
+ "dataset": "qqp",
74
+ "type": "role",
75
+ "origin_acc": 82.5,
76
+ "origin_prompt": "As an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions.",
77
+ "atk_acc": 79.5,
78
+ "atk_prompt": "Ao an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions. ",
79
+ "transfer_ori_acc": 87.3,
80
+ "transfer_atk_acc": 86.9
81
+ },
82
+ {
83
+ "dataset": "rte",
84
+ "type": "task",
85
+ "origin_acc": 83.5,
86
+ "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.",
87
+ "atk_acc": 79.0,
88
+ "atk_prompt": "ArYe the folowing two sentenes entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word. ",
89
+ "transfer_ori_acc": 88.08664259927798,
90
+ "transfer_atk_acc": 83.39350180505414
91
+ },
92
+ {
93
+ "dataset": "rte",
94
+ "type": "role",
95
+ "origin_acc": 80.5,
96
+ "origin_prompt": "As an instrument for entailment evaluation, consider the two sentences and determine if their relationship is 'entailment' or 'not_entailment'. Respond with 'entailment' or 'not_entailment':",
97
+ "atk_acc": 78.5,
98
+ "atk_prompt": "IAs an instrument for entailment evaluatioRn, consider the otwo sentences and determine if their relationship is 'entailment' or 'not_entailment'. Respond with 'entailment' or 'not_entailment':",
99
+ "transfer_ori_acc": 85.5595667870036,
100
+ "transfer_atk_acc": 85.92057761732852
101
+ },
102
+ {
103
+ "dataset": "sst2",
104
+ "type": "task",
105
+ "origin_acc": 93.0,
106
+ "origin_prompt": "Determine the overall sentiment of this sentence, categorizing it as 'positive' or 'negative':",
107
+ "atk_acc": 88.5,
108
+ "atk_prompt": "Deetrmine the overall sentLment of this sentence, categorizing it as 'positive' or 'negative': ",
109
+ "transfer_ori_acc": 94.26605504587155,
110
+ "transfer_atk_acc": 94.72477064220183
111
+ },
112
+ {
113
+ "dataset": "sst2",
114
+ "type": "role",
115
+ "origin_acc": 93.0,
116
+ "origin_prompt": "Functioning as a sentiment identification tool, assess if the following expression is 'positive' or 'negative'. Classify:",
117
+ "atk_acc": 85.0,
118
+ "atk_prompt": "uFnctioning as a sentmient identification tool, assess if the folGowing expression is 'positive' or 'negative'. Classify: ",
119
+ "transfer_ori_acc": 94.83944954128441,
120
+ "transfer_atk_acc": 93.34862385321101
121
+ },
122
+ {
123
+ "dataset": "wnli",
124
+ "type": "task",
125
+ "origin_acc": 66.2,
126
+ "origin_prompt": "Examine the pair of sentences and determine if they exhibit entailment or not_entailment. Answer with either 'entailment' or 'not_entailment'.",
127
+ "atk_acc": 53.52,
128
+ "atk_prompt": "Examine the Bair of snetences and determine if they exhibit entailment or not_entailment. Ancwer with eMther 'entailment' or 'not_entailment'.",
129
+ "transfer_ori_acc": 61.97183098591549,
130
+ "transfer_atk_acc": 63.38028169014085
131
+ },
132
+ {
133
+ "dataset": "wnli",
134
+ "type": "role",
135
+ "origin_acc": 63.38,
136
+ "origin_prompt": "In the role of a semantic relationship analyzer, examine the connection between the given sentences and decide if they exhibit entailment or not_entailment. Answer with 'entailment' or 'not_entailment':",
137
+ "atk_acc": 53.52,
138
+ "atk_prompt": "IQn the role of a semantic relationship analyzer, eamine the connection between the given sentences and decide if they exhbit entailment or not_entailment. Answer with 'entailment' or 'not_entailment':",
139
+ "transfer_ori_acc": 54.929577464788736,
140
+ "transfer_atk_acc": 54.929577464788736
141
+ },
142
+ {
143
+ "dataset": "mmlu",
144
+ "type": "task",
145
+ "origin_acc": 62.5,
146
+ "origin_prompt": "Regarding the following multiple-choice question on {}, pick the correct answer from the options 'A', 'B', 'C', or 'D'.",
147
+ "atk_acc": 60.0,
148
+ "atk_prompt": "Regarding the following multiple-choice question on {}, pick the correct answeWr from the options 'A', 'B', 'C', or 'D'. ",
149
+ "transfer_ori_acc": 44.32624113475177,
150
+ "transfer_atk_acc": 44.680851063829785
151
+ },
152
+ {
153
+ "dataset": "mmlu",
154
+ "type": "role",
155
+ "origin_acc": 62.5,
156
+ "origin_prompt": "With your knowledge of {}, tackle the following multiple-choice question by choosing 'A', 'B', 'C', or 'D'.",
157
+ "atk_acc": 59.0,
158
+ "atk_prompt": "With your knowledge of {}, tackle the follQwing multiple-choice question by choosing 'A', 'B', 'C', or 'D'.",
159
+ "transfer_ori_acc": 43.97163120567376,
160
+ "transfer_atk_acc": 43.08510638297872
161
+ },
162
+ {
163
+ "dataset": "squad_v2",
164
+ "type": "task",
165
+ "origin_acc": 71.87,
166
+ "origin_prompt": "Discover the best answer based on the context. If the context doesn't include an answer, respond with 'unanswerable'.",
167
+ "atk_acc": 56.61,
168
+ "atk_prompt": "DiscoveMr the best answr bassd on the Rontext. Iof the cnotext doesn't include an answer, resCpond with 'unanswerable'.",
169
+ "transfer_ori_acc": 87.54992770387507,
170
+ "transfer_atk_acc": 87.08932164326902
171
+ },
172
+ {
173
+ "dataset": "squad_v2",
174
+ "type": "role",
175
+ "origin_acc": 64.5,
176
+ "origin_prompt": "As an expert with a deep understanding of the context, find the best answer. If the context doesn't include an answer, say 'unanswerable'.",
177
+ "atk_acc": 36.52,
178
+ "atk_prompt": "zs an exprt with a deep understanding of the context, find the best answr. If the context doesn't include an anCswer, asy 'unanswerable'.",
179
+ "transfer_ori_acc": 88.04992770387507,
180
+ "transfer_atk_acc": 87.08932164326902
181
+ },
182
+ {
183
+ "dataset": "iwslt",
184
+ "type": "task",
185
+ "origin_acc": 0.22,
186
+ "origin_prompt": "Translate the provided sentence from {} to {} while maintaining the original meaning and context:",
187
+ "atk_acc": 0.21,
188
+ "atk_prompt": "Translate the provideMd sentence from {} to {} while mainOaining the original meaning and context:",
189
+ "transfer_ori_acc": 0.12847010019593297,
190
+ "transfer_atk_acc": 0.13470031324495513
191
+ },
192
+ {
193
+ "dataset": "iwslt",
194
+ "type": "role",
195
+ "origin_acc": 0.22,
196
+ "origin_prompt": "In the capacity of a proficient translator, please change the following sentence from {} to {}:",
197
+ "atk_acc": 0.21,
198
+ "atk_prompt": "In the capacity of a proficient tanslator, plnase change the following senlence from {} to {}: ",
199
+ "transfer_ori_acc": 0.13904950536673938,
200
+ "transfer_atk_acc": 0.1211819424209054
201
+ },
202
+ {
203
+ "dataset": "un_multi",
204
+ "type": "task",
205
+ "origin_acc": 0.34,
206
+ "origin_prompt": "Translate the following text from the source language {} to the target language {}:",
207
+ "atk_acc": 0.33,
208
+ "atk_prompt": "Translate the following text from the source language {} to the Ptarget langugae {}:",
209
+ "transfer_ori_acc": 0.2655722295717415,
210
+ "transfer_atk_acc": 0.24955021119352877
211
+ },
212
+ {
213
+ "dataset": "un_multi",
214
+ "type": "role",
215
+ "origin_acc": 0.34,
216
+ "origin_prompt": "In the role of a language expert, perform a machine translation for the provided sentence, changing it from {} to {}:",
217
+ "atk_acc": 0.32,
218
+ "atk_prompt": "In the role of a language expert, pErform a macRhine yranslation for the provided sentence, cZanging it from {} to {}: ",
219
+ "transfer_ori_acc": 0.26923226069781947,
220
+ "transfer_atk_acc": 0.2529020268653232
221
+ },
222
+ {
223
+ "dataset": "math",
224
+ "type": "task",
225
+ "origin_acc": 6.88,
226
+ "origin_prompt": "Resolve the following mathematical question associated with {}:",
227
+ "atk_acc": 5.62,
228
+ "atk_prompt": "Resolve the following mathematical quXestion assocated with {}:",
229
+ "transfer_ori_acc": 9.375,
230
+ "transfer_atk_acc": 11.875
231
+ },
232
+ {
233
+ "dataset": "math",
234
+ "type": "role",
235
+ "origin_acc": 5.0,
236
+ "origin_prompt": "Embodying a mathematics advisor, find the answer to the following math problem with {}:",
237
+ "atk_acc": 2.5,
238
+ "atk_prompt": "Embodying a mathematics advisor, ind the anwer to the fpollowing math problem with {}: ",
239
+ "transfer_ori_acc": 11.25,
240
+ "transfer_atk_acc": 12.5
241
+ }
242
+ ]
results_transfer/chatgpt_google-flan-t5-large/deepwordbug_3_shot.json ADDED
@@ -0,0 +1,242 @@