Spaces:
Runtime error
Runtime error
[ | |
{ | |
"dataset": "cola", | |
"type": "task", | |
"origin_acc": 78.5, | |
"origin_prompt": "Assess the grammatical structure of the given sentence and classify it as 'Acceptable' or 'Unacceptable':", | |
"atk_acc": 0.0, | |
"atk_prompt": "what the grammatical structure of the given sentence and consider it as 'Acceptable' or 'Unacceptable':", | |
"transfer_ori_acc": 50.5, | |
"transfer_atk_acc": 38.3 | |
}, | |
{ | |
"dataset": "cola", | |
"type": "role", | |
"origin_acc": 80.0, | |
"origin_prompt": "While performing grammar analysis, classify the grammar of the following sentence as 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar:", | |
"atk_acc": 0.0, | |
"atk_prompt": "While performing grammar analysis, classify the grammar of the following sentence as 'acceptable' for so grammar or 'unacceptable' for abnormal grammar:", | |
"transfer_ori_acc": 27.0, | |
"transfer_atk_acc": 30.7 | |
}, | |
{ | |
"dataset": "mrpc", | |
"type": "task", | |
"origin_acc": 75.5, | |
"origin_prompt": "Examine the pair of sentences and decide if their meanings are identical by answering with 'equivalent' or 'not_equivalent'.", | |
"atk_acc": 56.0, | |
"atk_prompt": "Examine the pairs of pronouns and evaluate if their meanings are different by answering with 'equivalent' or 'not_equivalent'. ", | |
"transfer_ori_acc": 64.2156862745098, | |
"transfer_atk_acc": 28.186274509803923 | |
}, | |
{ | |
"dataset": "mrpc", | |
"type": "role", | |
"origin_acc": 75.5, | |
"origin_prompt": "In your capacity as a semantic assessment tool, evaluate the provided sentences and classify their meanings as 'equivalent' or 'not_equivalent'.", | |
"atk_acc": 0.0, | |
"atk_prompt": "In your capacity as a semantic assessment tool, evaluate the provided tense and classify their constituents as 'equivalent' or 'not_equivalent'. ", | |
"transfer_ori_acc": 48.03921568627451, | |
"transfer_atk_acc": 36.029411764705884 | |
}, | |
{ | |
"dataset": "qnli", | |
"type": "task", | |
"origin_acc": 79.0, | |
"origin_prompt": "Assess if the answer to the question can be logically concluded from the provided context by choosing 'entailment' or 'not_entailment'.", | |
"atk_acc": 0.0, | |
"atk_prompt": "and if the answer to the question can be logically concluded from the provided context by avoiding 'entailment' or 'not_entailment'. ", | |
"transfer_ori_acc": 19.7, | |
"transfer_atk_acc": 2.5 | |
}, | |
{ | |
"dataset": "qnli", | |
"type": "role", | |
"origin_acc": 79.5, | |
"origin_prompt": "As a textual inference expert, analyze if the answer to the question can be deduced from the provided context and select 'entailment' or 'not_entailment'.", | |
"atk_acc": 0.0, | |
"atk_prompt": "As a textual inference expert, analysis if the answer to the question can be deduced from the provided citation and define 'entailment' or 'not_entailment'. ", | |
"transfer_ori_acc": 32.5, | |
"transfer_atk_acc": 12.9 | |
}, | |
{ | |
"dataset": "qqp", | |
"type": "task", | |
"origin_acc": 79.5, | |
"origin_prompt": "Can these two statements be considered equal in meaning? Answer with 'equivalent' or 'not_equivalent'.", | |
"atk_acc": 1.0, | |
"atk_prompt": "Can these two constructions be considered equal in meaning? object with 'equivalent' or 'not_equivalent'. ", | |
"transfer_ori_acc": 22.900000000000002, | |
"transfer_atk_acc": 2.3 | |
}, | |
{ | |
"dataset": "qqp", | |
"type": "role", | |
"origin_acc": 82.5, | |
"origin_prompt": "As an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions.", | |
"atk_acc": 0.0, | |
"atk_prompt": "give an table for category comparison evaluation, consider the questions and determine if their concept is the same, responding with 'equivalent' for similarity questions or 'not_equivalent' for different knowledge. ", | |
"transfer_ori_acc": 26.200000000000003, | |
"transfer_atk_acc": 6.2 | |
}, | |
{ | |
"dataset": "rte", | |
"type": "task", | |
"origin_acc": 82.5, | |
"origin_prompt": "Determine if the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment'.", | |
"atk_acc": 55.5, | |
"atk_prompt": "determine if the following singular of sentence indicating entailment or not_entailment. contrast with 'entailment' or 'not_entailment'.", | |
"transfer_ori_acc": 41.15523465703971, | |
"transfer_atk_acc": 0.36101083032490977 | |
}, | |
{ | |
"dataset": "rte", | |
"type": "role", | |
"origin_acc": 81.5, | |
"origin_prompt": "Working as an entailment classifier, identify whether the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment':", | |
"atk_acc": 37.5, | |
"atk_prompt": "at as an entailment classifier, know which the given type of verb displays entailment or not_entailment. object with 'entailment' or 'not_entailment':", | |
"transfer_ori_acc": 49.81949458483754, | |
"transfer_atk_acc": 33.2129963898917 | |
}, | |
{ | |
"dataset": "sst2", | |
"type": "task", | |
"origin_acc": 90.0, | |
"origin_prompt": "Review this statement and decide whether it has a 'positive' or 'negative' sentiment:", | |
"atk_acc": 0.0, | |
"atk_prompt": "what this assertion and are whether it has a 'positive' or 'negative' sentiment: ", | |
"transfer_ori_acc": 42.54587155963303, | |
"transfer_atk_acc": 27.75229357798165 | |
}, | |
{ | |
"dataset": "sst2", | |
"type": "role", | |
"origin_acc": 93.0, | |
"origin_prompt": "Functioning as a sentiment identification tool, assess if the following expression is 'positive' or 'negative'. Classify:", | |
"atk_acc": 76.0, | |
"atk_prompt": "from as a sentiments identification tool, assess if the same word is 'positive' or 'negative'. function: ", | |
"transfer_ori_acc": 38.646788990825684, | |
"transfer_atk_acc": 19.495412844036696 | |
}, | |
{ | |
"dataset": "wnli", | |
"type": "task", | |
"origin_acc": 69.01, | |
"origin_prompt": "Identify whether the given pair of sentences demonstrates entailment or not_entailment. Answer with 'entailment' or 'not_entailment'.", | |
"atk_acc": 47.89, | |
"atk_prompt": "Identify whether the given individual of verbs demonstrates entailment or not_entailment. have with 'entailment' or 'not_entailment'.", | |
"transfer_ori_acc": 33.80281690140845, | |
"transfer_atk_acc": 2.8169014084507045 | |
}, | |
{ | |
"dataset": "wnli", | |
"type": "role", | |
"origin_acc": 60.56, | |
"origin_prompt": "Working as an entailment classifier, identify whether the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment':", | |
"atk_acc": 47.89, | |
"atk_prompt": "Working as an entailment classifier, identify whether the given corpus of verbs displays entailment or not_entailment. act with 'entailment' or 'not_entailment':", | |
"transfer_ori_acc": 43.66197183098591, | |
"transfer_atk_acc": 19.718309859154928 | |
}, | |
{ | |
"dataset": "mmlu", | |
"type": "task", | |
"origin_acc": 63.5, | |
"origin_prompt": "Regarding the following multiple-choice question on {}, pick the correct answer from the options 'A', 'B', 'C', or 'D'.", | |
"atk_acc": 14.5, | |
"atk_prompt": "1 the first multiple-choice question on {}, remove the following answer from the options 'A', 'B', 'C', or 'D'. ", | |
"transfer_ori_acc": 18.439716312056735, | |
"transfer_atk_acc": 31.914893617021278 | |
}, | |
{ | |
"dataset": "mmlu", | |
"type": "role", | |
"origin_acc": 62.0, | |
"origin_prompt": "Drawing upon your mastery of {}, please answer the multiple-choice question by selecting the correct option from 'A', 'B', 'C', or 'D'.", | |
"atk_acc": 1.0, | |
"atk_prompt": "Drawing upon your mastery of {}, simply question the multiple-selecting question by selecting the wrong color from 'A', 'B', 'C', or 'D'.", | |
"transfer_ori_acc": 22.340425531914892, | |
"transfer_atk_acc": 17.19858156028369 | |
} | |
] |