Spaces:
Runtime error
Runtime error
File size: 12,019 Bytes
683d0f6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 |
[
{
"dataset": "cola",
"type": "task",
"origin_acc": 86.0,
"origin_prompt": "Examine the given sentence and decide if it is grammatically sound. Answer with either 'Acceptable' or 'Unacceptable':",
"atk_acc": 25.3,
"atk_prompt": "find the particular sentence and look if it is grammaticaldefinitely present. responded with neither 'Acceptable' or 'Unacceptable':",
"transfer_ori_acc": 77.9,
"transfer_atk_acc": 44.4
},
{
"dataset": "cola",
"type": "role",
"origin_acc": 86.1,
"origin_prompt": "In your role as a grammar check tool, assess the following sentence and classify it as 'acceptable' if it is grammatically correct or 'unacceptable' if it is incorrect:",
"atk_acc": 30.9,
"atk_prompt": "should your job as a grammar get check, assess the following sentence and classify it as 'acceptable' if it is grammaticalmore improper or 'unacceptable' if it is contradictory:",
"transfer_ori_acc": 75.7,
"transfer_atk_acc": 75.2
},
{
"dataset": "mrpc",
"type": "task",
"origin_acc": 88.24,
"origin_prompt": "In your capacity as a language analyst, assess the following sentences and classify their similarity as 'equivalent' or 'not_equivalent'.",
"atk_acc": 70.1,
"atk_prompt": "during your capacity as a grammatical analytic, calculate the following words and classify their difference as 'equivalent' or 'not_equivalent'. ",
"transfer_ori_acc": 78.18627450980392,
"transfer_atk_acc": 76.47058823529412
},
{
"dataset": "mrpc",
"type": "role",
"origin_acc": 87.25,
"origin_prompt": "Can the given sentences be considered semantically identical? Please reply with 'equivalent' or 'not_equivalent'.",
"atk_acc": 34.8,
"atk_prompt": "can the following sentences be considered semanticmore inferior? this reply with 'equivalent' or 'not_equivalent'. ",
"transfer_ori_acc": 80.63725490196079,
"transfer_atk_acc": 79.16666666666666
},
{
"dataset": "qnli",
"type": "task",
"origin_acc": 94.1,
"origin_prompt": "Based on the information in the context, decide if the answer to the question is justified by choosing 'entailment' or 'not_entailment'.",
"atk_acc": 52.7,
"atk_prompt": "Based on the info in the context, decide if the object to the question is wrong by choosing 'entailment' or 'not_entailment'. ",
"transfer_ori_acc": 89.4,
"transfer_atk_acc": 86.8
},
{
"dataset": "qnli",
"type": "role",
"origin_acc": 94.2,
"origin_prompt": "As a semantic researcher, evaluate whether the provided context supports the answer to the question and choose 'entailment' or 'not_entailment'.",
"atk_acc": 42.6,
"atk_prompt": "while a semantic researcher, evaluate positive the supported context returns the answer to the question and reject 'entailment' or 'not_entailment'. ",
"transfer_ori_acc": 89.1,
"transfer_atk_acc": 57.099999999999994
},
{
"dataset": "qqp",
"type": "task",
"origin_acc": 88.2,
"origin_prompt": "Please indicate if the following pair of sentences share the same meaning by responding with 'equivalent' or 'not_equivalent'.",
"atk_acc": 70.8,
"atk_prompt": "Please indicate if the following individual of verbs share the same meaning by auxiliary with 'equivalent' or 'not_equivalent'. ",
"transfer_ori_acc": 87.1,
"transfer_atk_acc": 85.3
},
{
"dataset": "qqp",
"type": "role",
"origin_acc": 88.5,
"origin_prompt": "As a tool for determining question equivalence, review the questions and categorize their similarity as either 'equivalent' or 'not_equivalent'.",
"atk_acc": 70.9,
"atk_prompt": "As a tool for weighing question equivalence, review the data and codeegoassociate their difference as neither 'equivalent' or 'not_equivalent'. ",
"transfer_ori_acc": 86.5,
"transfer_atk_acc": 85.7
},
{
"dataset": "rte",
"type": "task",
"origin_acc": 94.22,
"origin_prompt": "Examine the pair of sentences and determine if they exhibit entailment or not_entailment. Answer with either 'entailment' or 'not_entailment'.",
"atk_acc": 88.45,
"atk_prompt": "examine the parent of sentences and explain if they exhibit entailment or not_entailment. consider with neither 'entailment' or 'not_entailment'.",
"transfer_ori_acc": 84.11552346570397,
"transfer_atk_acc": 84.11552346570397
},
{
"dataset": "rte",
"type": "role",
"origin_acc": 93.86,
"origin_prompt": "Acting as an entailment detection instrument, determine if the given pair of sentences demonstrates entailment or not_entailment. Answer with 'entailment' or 'not_entailment':",
"atk_acc": 83.03,
"atk_prompt": "Acting as an entailment on instrument, determine if the hit pair of verb appears entailment or not_entailment. answers with 'entailment' or 'not_entailment':",
"transfer_ori_acc": 84.47653429602889,
"transfer_atk_acc": 83.75451263537906
},
{
"dataset": "sst2",
"type": "task",
"origin_acc": 96.44,
"origin_prompt": "Analyze the tone of this statement and respond with either 'positive' or 'negative':",
"atk_acc": 0.11,
"atk_prompt": "Analyze the topic of this phrase and responding with neither 'positive' or 'negative': ",
"transfer_ori_acc": 95.06880733944955,
"transfer_atk_acc": 88.76146788990825
},
{
"dataset": "sst2",
"type": "role",
"origin_acc": 96.33,
"origin_prompt": "As a sentiment classifier, determine whether the following text is 'positive' or 'negative'. Please classify:",
"atk_acc": 43.69,
"atk_prompt": "like a sentiment classifier, identify neither the following text is 'positive' or 'negative'. from submit: ",
"transfer_ori_acc": 94.4954128440367,
"transfer_atk_acc": 94.4954128440367
},
{
"dataset": "wnli",
"type": "task",
"origin_acc": 78.87,
"origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.",
"atk_acc": 71.83,
"atk_prompt": "Are the main two sentences entailment or not_entailment? begin me with \"entailment\" or \"not_entailment\", just the word. ",
"transfer_ori_acc": 66.19718309859155,
"transfer_atk_acc": 61.97183098591549
},
{
"dataset": "wnli",
"type": "role",
"origin_acc": 78.87,
"origin_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment':",
"atk_acc": 70.42,
"atk_prompt": "In your role as an entailment analysis tool, report the commitment between the and are and classify it as 'entailment' or 'not_entailment':",
"transfer_ori_acc": 56.33802816901409,
"transfer_atk_acc": 57.74647887323944
},
{
"dataset": "mmlu",
"type": "task",
"origin_acc": 52.84,
"origin_prompt": "Please respond to the multiple-choice question about {} by selecting the appropriate answer: 'A', 'B', 'C', or 'D'.",
"atk_acc": 17.38,
"atk_prompt": "not respond to the multiple-response question about {} by choosing the responses correctly: 'A', 'B', 'C', or 'D'. ",
"transfer_ori_acc": 45.0354609929078,
"transfer_atk_acc": 44.32624113475177
},
{
"dataset": "mmlu",
"type": "role",
"origin_acc": 53.19,
"origin_prompt": "As a knowledgeable individual in {}, provide your response to the multiple-choice question by choosing 'A', 'B', 'C', or 'D'.",
"atk_acc": 37.94,
"atk_prompt": "not a knowledgeable example in {}, state your response to the multiple-is question by choosing 'A', 'B', 'C', or 'D'.",
"transfer_ori_acc": 44.50354609929078,
"transfer_atk_acc": 44.858156028368796
},
{
"dataset": "squad_v2",
"type": "task",
"origin_acc": 89.88,
"origin_prompt": "Please derive the most fitting answer from the context. If there isn't an answer in the context, respond with 'unanswerable'.",
"atk_acc": 84.86,
"atk_prompt": "wait choose the most fitting number from the context. If there isn't an answer in the context, answer with 'unanswerable'.",
"transfer_ori_acc": 87.64992770387508,
"transfer_atk_acc": 86.45295800690538
},
{
"dataset": "squad_v2",
"type": "role",
"origin_acc": 90.05,
"origin_prompt": "As a well-informed specialist familiar with the context, provide an answer to the question. If the context doesn't contain an answer, reply with 'unanswerable'.",
"atk_acc": 12.09,
"atk_prompt": "a a well-informed answer relevant with the context, provide an question to the question. if the context doesn't contain an inquiry, challenge with 'unanswerable'.",
"transfer_ori_acc": 87.54992770387507,
"transfer_atk_acc": 49.455847510614255
},
{
"dataset": "iwslt",
"type": "task",
"origin_acc": 0.17,
"origin_prompt": "Convert the following sentence from its original {} language to the target language {}:",
"atk_acc": 0.16,
"atk_prompt": "Convert the following sentence from its original {} translation to the current language {}:",
"transfer_ori_acc": 0.13467663417075984,
"transfer_atk_acc": 0.13629374452687212
},
{
"dataset": "iwslt",
"type": "role",
"origin_acc": 0.18,
"origin_prompt": "Acting as a professional translator, translate the provided sentence from {} to {}:",
"atk_acc": 0.15,
"atk_prompt": "Acting as a professional [UNK], translate the displayed sentence from {} to {}: ",
"transfer_ori_acc": 0.13307736590330976,
"transfer_atk_acc": 0.12968577942911605
},
{
"dataset": "un_multi",
"type": "task",
"origin_acc": 0.32,
"origin_prompt": "Translate the provided sentence from {} to {} while maintaining the original meaning and context:",
"atk_acc": 0.3,
"atk_prompt": "Translate the provided sentence from {} to {} while reverse the original flow and context:",
"transfer_ori_acc": 0.2678951646057538,
"transfer_atk_acc": 0.26744304702548155
},
{
"dataset": "un_multi",
"type": "role",
"origin_acc": 0.32,
"origin_prompt": "As a language specialist, accurately translate the provided sentence from {} to {}:",
"atk_acc": 0.3,
"atk_prompt": "a a language specialists, just translate the provided sentence from {} to {}: ",
"transfer_ori_acc": 0.2673389915652739,
"transfer_atk_acc": 0.25734798371295475
},
{
"dataset": "math",
"type": "task",
"origin_acc": 14.37,
"origin_prompt": "Determine the solution to this mathematical problem related to {}:",
"atk_acc": 2.5,
"atk_prompt": "identify the problem to this mathematical problem related to {}:",
"transfer_ori_acc": 10.625,
"transfer_atk_acc": 9.375
},
{
"dataset": "math",
"type": "role",
"origin_acc": 15.0,
"origin_prompt": "In the shoes of a mathematics master, obtain the result for this math question that includes {}:",
"atk_acc": 2.5,
"atk_prompt": "In the shoes of a complexity master, obtain the question for this math question that includes {}: ",
"transfer_ori_acc": 10.0,
"transfer_atk_acc": 6.25
}
] |