Spaces:
Runtime error
Runtime error
[ | |
{ | |
"dataset": "cola", | |
"type": "task", | |
"origin_acc": 78.5, | |
"origin_prompt": "Assess the grammatical structure of the given sentence and classify it as 'Acceptable' or 'Unacceptable':", | |
"atk_acc": 0.0, | |
"atk_prompt": "what the grammatical structure of the given sentence and consider it as 'Acceptable' or 'Unacceptable':", | |
"transfer_ori_acc": 85.39999999999999, | |
"transfer_atk_acc": 85.5 | |
}, | |
{ | |
"dataset": "cola", | |
"type": "role", | |
"origin_acc": 80.0, | |
"origin_prompt": "While performing grammar analysis, classify the grammar of the following sentence as 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar:", | |
"atk_acc": 0.0, | |
"atk_prompt": "While performing grammar analysis, classify the grammar of the following sentence as 'acceptable' for so grammar or 'unacceptable' for abnormal grammar:", | |
"transfer_ori_acc": 86.0, | |
"transfer_atk_acc": 85.5 | |
}, | |
{ | |
"dataset": "mrpc", | |
"type": "task", | |
"origin_acc": 75.5, | |
"origin_prompt": "Examine the pair of sentences and decide if their meanings are identical by answering with 'equivalent' or 'not_equivalent'.", | |
"atk_acc": 56.0, | |
"atk_prompt": "Examine the pairs of pronouns and evaluate if their meanings are different by answering with 'equivalent' or 'not_equivalent'. ", | |
"transfer_ori_acc": 86.02941176470588, | |
"transfer_atk_acc": 84.06862745098039 | |
}, | |
{ | |
"dataset": "mrpc", | |
"type": "role", | |
"origin_acc": 75.5, | |
"origin_prompt": "In your capacity as a semantic assessment tool, evaluate the provided sentences and classify their meanings as 'equivalent' or 'not_equivalent'.", | |
"atk_acc": 0.0, | |
"atk_prompt": "In your capacity as a semantic assessment tool, evaluate the provided tense and classify their constituents as 'equivalent' or 'not_equivalent'. ", | |
"transfer_ori_acc": 83.33333333333334, | |
"transfer_atk_acc": 83.08823529411765 | |
}, | |
{ | |
"dataset": "qnli", | |
"type": "task", | |
"origin_acc": 79.0, | |
"origin_prompt": "Assess if the answer to the question can be logically concluded from the provided context by choosing 'entailment' or 'not_entailment'.", | |
"atk_acc": 0.0, | |
"atk_prompt": "and if the answer to the question can be logically concluded from the provided context by avoiding 'entailment' or 'not_entailment'. ", | |
"transfer_ori_acc": 94.0, | |
"transfer_atk_acc": 77.3 | |
}, | |
{ | |
"dataset": "qnli", | |
"type": "role", | |
"origin_acc": 79.5, | |
"origin_prompt": "As a textual inference expert, analyze if the answer to the question can be deduced from the provided context and select 'entailment' or 'not_entailment'.", | |
"atk_acc": 0.0, | |
"atk_prompt": "As a textual inference expert, analysis if the answer to the question can be deduced from the provided citation and define 'entailment' or 'not_entailment'. ", | |
"transfer_ori_acc": 93.30000000000001, | |
"transfer_atk_acc": 94.3 | |
}, | |
{ | |
"dataset": "qqp", | |
"type": "task", | |
"origin_acc": 79.5, | |
"origin_prompt": "Can these two statements be considered equal in meaning? Answer with 'equivalent' or 'not_equivalent'.", | |
"atk_acc": 1.0, | |
"atk_prompt": "Can these two constructions be considered equal in meaning? object with 'equivalent' or 'not_equivalent'. ", | |
"transfer_ori_acc": 87.4, | |
"transfer_atk_acc": 87.1 | |
}, | |
{ | |
"dataset": "qqp", | |
"type": "role", | |
"origin_acc": 82.5, | |
"origin_prompt": "As an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions.", | |
"atk_acc": 0.0, | |
"atk_prompt": "give an table for category comparison evaluation, consider the questions and determine if their concept is the same, responding with 'equivalent' for similarity questions or 'not_equivalent' for different knowledge. ", | |
"transfer_ori_acc": 88.3, | |
"transfer_atk_acc": 89.2 | |
}, | |
{ | |
"dataset": "rte", | |
"type": "task", | |
"origin_acc": 82.5, | |
"origin_prompt": "Determine if the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment'.", | |
"atk_acc": 55.5, | |
"atk_prompt": "determine if the following singular of sentence indicating entailment or not_entailment. contrast with 'entailment' or 'not_entailment'.", | |
"transfer_ori_acc": 93.50180505415162, | |
"transfer_atk_acc": 89.53068592057761 | |
}, | |
{ | |
"dataset": "rte", | |
"type": "role", | |
"origin_acc": 81.5, | |
"origin_prompt": "Working as an entailment classifier, identify whether the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment':", | |
"atk_acc": 37.5, | |
"atk_prompt": "at as an entailment classifier, know which the given type of verb displays entailment or not_entailment. object with 'entailment' or 'not_entailment':", | |
"transfer_ori_acc": 94.58483754512635, | |
"transfer_atk_acc": 90.25270758122743 | |
}, | |
{ | |
"dataset": "sst2", | |
"type": "task", | |
"origin_acc": 90.0, | |
"origin_prompt": "Review this statement and decide whether it has a 'positive' or 'negative' sentiment:", | |
"atk_acc": 0.0, | |
"atk_prompt": "what this assertion and are whether it has a 'positive' or 'negative' sentiment: ", | |
"transfer_ori_acc": 96.55963302752293, | |
"transfer_atk_acc": 96.44495412844036 | |
}, | |
{ | |
"dataset": "sst2", | |
"type": "role", | |
"origin_acc": 93.0, | |
"origin_prompt": "Functioning as a sentiment identification tool, assess if the following expression is 'positive' or 'negative'. Classify:", | |
"atk_acc": 76.0, | |
"atk_prompt": "from as a sentiments identification tool, assess if the same word is 'positive' or 'negative'. function: ", | |
"transfer_ori_acc": 95.9862385321101, | |
"transfer_atk_acc": 95.52752293577981 | |
}, | |
{ | |
"dataset": "wnli", | |
"type": "task", | |
"origin_acc": 69.01, | |
"origin_prompt": "Identify whether the given pair of sentences demonstrates entailment or not_entailment. Answer with 'entailment' or 'not_entailment'.", | |
"atk_acc": 47.89, | |
"atk_prompt": "Identify whether the given individual of verbs demonstrates entailment or not_entailment. have with 'entailment' or 'not_entailment'.", | |
"transfer_ori_acc": 78.87323943661971, | |
"transfer_atk_acc": 71.83098591549296 | |
}, | |
{ | |
"dataset": "wnli", | |
"type": "role", | |
"origin_acc": 60.56, | |
"origin_prompt": "Working as an entailment classifier, identify whether the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment':", | |
"atk_acc": 47.89, | |
"atk_prompt": "Working as an entailment classifier, identify whether the given corpus of verbs displays entailment or not_entailment. act with 'entailment' or 'not_entailment':", | |
"transfer_ori_acc": 80.28169014084507, | |
"transfer_atk_acc": 69.01408450704226 | |
}, | |
{ | |
"dataset": "mmlu", | |
"type": "task", | |
"origin_acc": 63.5, | |
"origin_prompt": "Regarding the following multiple-choice question on {}, pick the correct answer from the options 'A', 'B', 'C', or 'D'.", | |
"atk_acc": 14.5, | |
"atk_prompt": "1 the first multiple-choice question on {}, remove the following answer from the options 'A', 'B', 'C', or 'D'. ", | |
"transfer_ori_acc": 52.304964539007095, | |
"transfer_atk_acc": 15.957446808510639 | |
}, | |
{ | |
"dataset": "mmlu", | |
"type": "role", | |
"origin_acc": 62.0, | |
"origin_prompt": "Drawing upon your mastery of {}, please answer the multiple-choice question by selecting the correct option from 'A', 'B', 'C', or 'D'.", | |
"atk_acc": 1.0, | |
"atk_prompt": "Drawing upon your mastery of {}, simply question the multiple-selecting question by selecting the wrong color from 'A', 'B', 'C', or 'D'.", | |
"transfer_ori_acc": 52.659574468085104, | |
"transfer_atk_acc": 14.893617021276595 | |
}, | |
{ | |
"dataset": "squad_v2", | |
"type": "task", | |
"origin_acc": 72.47, | |
"origin_prompt": "Discover the best answer based on the context. If the context doesn't include an answer, respond with 'unanswerable'.", | |
"atk_acc": 2.76, | |
"atk_prompt": "challenge the best answer based on the context. however the context doesn't include an answer, play with 'unanswerable'.", | |
"transfer_ori_acc": 89.38229949874687, | |
"transfer_atk_acc": 89.88229949874687 | |
}, | |
{ | |
"dataset": "squad_v2", | |
"type": "role", | |
"origin_acc": 65.63, | |
"origin_prompt": "Drawing on your expertise in the context, provide the most precise answer. If the answer is not in the context, respond with 'unanswerable'.", | |
"atk_acc": 1.86, | |
"atk_prompt": "Drawing on your technique in the frame, offer the most precise answer. saying the answer is not in the context, play with 'unanswerable'.", | |
"transfer_ori_acc": 89.48229949874687, | |
"transfer_atk_acc": 89.98229949874687 | |
}, | |
{ | |
"dataset": "iwslt", | |
"type": "task", | |
"origin_acc": 0.22, | |
"origin_prompt": "Translate the provided sentence from {} to {} while maintaining the original meaning and context:", | |
"atk_acc": 0.08, | |
"atk_prompt": "Translate the designated sentence from {} to {} while defending the assumed meaning and context:", | |
"transfer_ori_acc": 0.1697518407093131, | |
"transfer_atk_acc": 0.17006977668295734 | |
}, | |
{ | |
"dataset": "iwslt", | |
"type": "role", | |
"origin_acc": 0.22, | |
"origin_prompt": "Taking on the role of a certified translator, perform a translation of the given sentence from {} to {}:", | |
"atk_acc": 0.01, | |
"atk_prompt": "who on the role of a certified translated, request a translator of the defined sentence from {} to {}: ", | |
"transfer_ori_acc": 0.1727120569025014, | |
"transfer_atk_acc": 0.16908374849908425 | |
}, | |
{ | |
"dataset": "un_multi", | |
"type": "task", | |
"origin_acc": 0.34, | |
"origin_prompt": "Translate the following text from the source language {} to the target language {}:", | |
"atk_acc": 0.18, | |
"atk_prompt": "Translate the by text from the source language {} to the english example {}:", | |
"transfer_ori_acc": 0.30893486445367974, | |
"transfer_atk_acc": 0.31345451169084315 | |
}, | |
{ | |
"dataset": "un_multi", | |
"type": "role", | |
"origin_acc": 0.34, | |
"origin_prompt": "In the role of a language expert, perform a machine translation for the provided sentence, changing it from {} to {}:", | |
"atk_acc": 0.08, | |
"atk_prompt": "of the role of a language expertise, define a machine and for the presented sentence, changing it from {} to {}: ", | |
"transfer_ori_acc": 0.31256243540663714, | |
"transfer_atk_acc": 0.3081120174260553 | |
}, | |
{ | |
"dataset": "math", | |
"type": "task", | |
"origin_acc": 6.25, | |
"origin_prompt": "Resolve the following mathematical question associated with {}:", | |
"atk_acc": 0.62, | |
"atk_prompt": "Resolve the following mathematical answer along with {}:", | |
"transfer_ori_acc": 14.374999999999998, | |
"transfer_atk_acc": 13.750000000000002 | |
}, | |
{ | |
"dataset": "math", | |
"type": "role", | |
"origin_acc": 5.0, | |
"origin_prompt": "In the position of a mathematics coach, figure out the answer to this math challenge involving {}:", | |
"atk_acc": 0.0, | |
"atk_prompt": "In the position of a mathematics coach, develop out the theme to this math challenge involving {}: ", | |
"transfer_ori_acc": 10.625, | |
"transfer_atk_acc": 3.75 | |
} | |
] |