Spaces:
Runtime error
Runtime error
[ | |
{ | |
"dataset": "cola", | |
"type": "task", | |
"origin_acc": 78.5, | |
"origin_prompt": "Review the sentence below and identify whether its grammar is 'Acceptable' or 'Unacceptable':", | |
"atk_acc": 72.5, | |
"atk_prompt": "Review the sentence below and identify whether its grammar is 'Acceptable' or 'Unacceptable' zexKUTWJYG:", | |
"transfer_ori_acc": 28.9, | |
"transfer_atk_acc": 22.7 | |
}, | |
{ | |
"dataset": "cola", | |
"type": "role", | |
"origin_acc": 79.0, | |
"origin_prompt": "While performing grammar analysis, classify the grammar of the following sentence as 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar:", | |
"atk_acc": 45.0, | |
"atk_prompt": "While performing grammar analysis, classify the grammar of the following sentence as 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar zexKUTWJYG:", | |
"transfer_ori_acc": 27.0, | |
"transfer_atk_acc": 38.6 | |
}, | |
{ | |
"dataset": "mrpc", | |
"type": "task", | |
"origin_acc": 75.5, | |
"origin_prompt": "Evaluate whether the two provided sentences convey the same meaning by answering 'equivalent' or 'not_equivalent'.", | |
"atk_acc": 72.0, | |
"atk_prompt": "Evaluate whether the two provided sentences convey the same meaning by answering 'equivalent' or 'not_equivalent' SvAp8RlOFn. ", | |
"transfer_ori_acc": 48.77450980392157, | |
"transfer_atk_acc": 0.49019607843137253 | |
}, | |
{ | |
"dataset": "mrpc", | |
"type": "role", | |
"origin_acc": 75.5, | |
"origin_prompt": "In your capacity as a semantic assessment tool, evaluate the provided sentences and classify their meanings as 'equivalent' or 'not_equivalent'.", | |
"atk_acc": 73.5, | |
"atk_prompt": "In your capacity as a semantic assessment tool, evaluate the provided sentences and classify their meanings as 'equivalent' or 'not_equivalent' Kw6nrs57gH. ", | |
"transfer_ori_acc": 48.03921568627451, | |
"transfer_atk_acc": 56.372549019607845 | |
}, | |
{ | |
"dataset": "qnli", | |
"type": "task", | |
"origin_acc": 78.0, | |
"origin_prompt": "Evaluate whether the given context supports the answer to the question by responding with 'entailment' or 'not_entailment'.", | |
"atk_acc": 75.5, | |
"atk_prompt": "Evaluate whether the given context supports the answer to the question by responding with 'entailment' or 'not_entailment' ItGDrrA1Sc. ", | |
"transfer_ori_acc": 26.5, | |
"transfer_atk_acc": 16.5 | |
}, | |
{ | |
"dataset": "qnli", | |
"type": "role", | |
"origin_acc": 78.0, | |
"origin_prompt": "As a semantic researcher, evaluate whether the provided context supports the answer to the question and choose 'entailment' or 'not_entailment'.", | |
"atk_acc": 74.5, | |
"atk_prompt": "As a semantic researcher, evaluate whether the provided context supports the answer to the question and choose 'entailment' or 'not_entailment' 6a4Yn3RGVc. ", | |
"transfer_ori_acc": 13.4, | |
"transfer_atk_acc": 11.200000000000001 | |
}, | |
{ | |
"dataset": "qqp", | |
"type": "task", | |
"origin_acc": 81.5, | |
"origin_prompt": "Assess whether the following statements are identical in meaning by answering 'equivalent' or 'not_equivalent'.", | |
"atk_acc": 78.0, | |
"atk_prompt": "Assess whether the following statements are identical in meaning by answering 'equivalent' or 'not_equivalent' iia2yL9Uzm. ", | |
"transfer_ori_acc": 30.2, | |
"transfer_atk_acc": 26.900000000000002 | |
}, | |
{ | |
"dataset": "qqp", | |
"type": "role", | |
"origin_acc": 83.0, | |
"origin_prompt": "As an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions.", | |
"atk_acc": 80.5, | |
"atk_prompt": "As an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions Cev0s49fbe. ", | |
"transfer_ori_acc": 26.200000000000003, | |
"transfer_atk_acc": 24.4 | |
}, | |
{ | |
"dataset": "rte", | |
"type": "task", | |
"origin_acc": 82.5, | |
"origin_prompt": "Determine if the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment'.", | |
"atk_acc": 80.0, | |
"atk_prompt": "Determine if the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment' MSAddJ4D2a.", | |
"transfer_ori_acc": 41.15523465703971, | |
"transfer_atk_acc": 20.577617328519857 | |
}, | |
{ | |
"dataset": "rte", | |
"type": "role", | |
"origin_acc": 81.5, | |
"origin_prompt": "As an instrument for entailment evaluation, consider the two sentences and determine if their relationship is 'entailment' or 'not_entailment'. Respond with 'entailment' or 'not_entailment':", | |
"atk_acc": 80.0, | |
"atk_prompt": "As an instrument for entailment evaluation, consider the two sentences and determine if their relationship is 'entailment' or 'not_entailment'. Respond with 'entailment' or 'not_entailment' 6a4Yn3RGVc:", | |
"transfer_ori_acc": 24.90974729241877, | |
"transfer_atk_acc": 6.859205776173286 | |
}, | |
{ | |
"dataset": "sst2", | |
"type": "task", | |
"origin_acc": 93.0, | |
"origin_prompt": "Determine the overall sentiment of this sentence, categorizing it as 'positive' or 'negative':", | |
"atk_acc": 90.0, | |
"atk_prompt": "Determine the overall sentiment of this sentence, categorizing it as 'positive' or 'negative' AMsRIKZniY: ", | |
"transfer_ori_acc": 14.3348623853211, | |
"transfer_atk_acc": 6.8807339449541285 | |
}, | |
{ | |
"dataset": "sst2", | |
"type": "role", | |
"origin_acc": 92.5, | |
"origin_prompt": "Functioning as a sentiment identification tool, assess if the following expression is 'positive' or 'negative'. Classify:", | |
"atk_acc": 92.0, | |
"atk_prompt": "Functioning as a sentiment identification tool, assess if the following expression is 'positive' or 'negative'. Classify Cev0s49fbe: ", | |
"transfer_ori_acc": 38.646788990825684, | |
"transfer_atk_acc": 33.48623853211009 | |
}, | |
{ | |
"dataset": "wnli", | |
"type": "task", | |
"origin_acc": 64.79, | |
"origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.", | |
"atk_acc": 59.15, | |
"atk_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word zexKUTWJYG. ", | |
"transfer_ori_acc": 25.352112676056336, | |
"transfer_atk_acc": 2.8169014084507045 | |
}, | |
{ | |
"dataset": "wnli", | |
"type": "role", | |
"origin_acc": 63.38, | |
"origin_prompt": "In the role of a semantic relationship analyzer, examine the connection between the given sentences and decide if they exhibit entailment or not_entailment. Answer with 'entailment' or 'not_entailment':", | |
"atk_acc": 57.75, | |
"atk_prompt": "In the role of a semantic relationship analyzer, examine the connection between the given sentences and decide if they exhibit entailment or not_entailment. Answer with 'entailment' or 'not_entailment' 5XeflW1ZJc:", | |
"transfer_ori_acc": 43.66197183098591, | |
"transfer_atk_acc": 29.577464788732392 | |
}, | |
{ | |
"dataset": "mmlu", | |
"type": "task", | |
"origin_acc": 64.0, | |
"origin_prompt": "Regarding the following multiple-choice question on {}, pick the correct answer from the options 'A', 'B', 'C', or 'D'.", | |
"atk_acc": 58.0, | |
"atk_prompt": "Regarding the following multiple-choice question on {}, pick the correct answer from the options 'A', 'B', 'C', or 'D' Pbl3tYuoRo. ", | |
"transfer_ori_acc": 18.439716312056735, | |
"transfer_atk_acc": 23.22695035460993 | |
}, | |
{ | |
"dataset": "mmlu", | |
"type": "role", | |
"origin_acc": 61.5, | |
"origin_prompt": "As a skilled person in the field of {}, please respond to the multiple-choice question by choosing 'A', 'B', 'C', or 'D'.", | |
"atk_acc": 58.0, | |
"atk_prompt": "As a skilled person in the field of {}, please respond to the multiple-choice question by choosing 'A', 'B', 'C', or 'D' IhuBIhoPGc.", | |
"transfer_ori_acc": 20.0354609929078, | |
"transfer_atk_acc": 18.085106382978726 | |
} | |
] |