Spaces:
Runtime error
Runtime error
[ | |
{ | |
"dataset": "cola", | |
"type": "task", | |
"origin_acc": 79.0, | |
"origin_prompt": "Review the sentence below and identify whether its grammar is 'Acceptable' or 'Unacceptable':", | |
"atk_acc": 2.0, | |
"atk_prompt": "Revising the punishments below and locate whether its dictionary is 'Acceptable' or 'Unacceptable':", | |
"transfer_ori_acc": 76.5, | |
"transfer_atk_acc": 45.7 | |
}, | |
{ | |
"dataset": "cola", | |
"type": "role", | |
"origin_acc": 80.0, | |
"origin_prompt": "While performing grammar analysis, classify the grammar of the following sentence as 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar:", | |
"atk_acc": 6.0, | |
"atk_prompt": "While performing jargon discusses, categories the language of the below sentence as 'acceptable' for mend jargon or 'unacceptable' for unfit grammar:", | |
"transfer_ori_acc": 75.5, | |
"transfer_atk_acc": 76.2 | |
}, | |
{ | |
"dataset": "mrpc", | |
"type": "task", | |
"origin_acc": 75.0, | |
"origin_prompt": "Examine the pair of sentences and decide if their meanings are identical by answering with 'equivalent' or 'not_equivalent'.", | |
"atk_acc": 68.5, | |
"atk_prompt": "Examine the pair of sentences and selects if their regard are identical by responding with 'equivalent' or 'not_equivalent'. ", | |
"transfer_ori_acc": 82.1078431372549, | |
"transfer_atk_acc": 82.1078431372549 | |
}, | |
{ | |
"dataset": "mrpc", | |
"type": "role", | |
"origin_acc": 75.5, | |
"origin_prompt": "In your capacity as a semantic assessment tool, evaluate the provided sentences and classify their meanings as 'equivalent' or 'not_equivalent'.", | |
"atk_acc": 0.0, | |
"atk_prompt": "About your expertise as a semantic appraisal means, evaluate the provided penalty and classify their meanings as 'equivalent' or 'not_equivalent'. ", | |
"transfer_ori_acc": 76.47058823529412, | |
"transfer_atk_acc": 76.47058823529412 | |
}, | |
{ | |
"dataset": "qnli", | |
"type": "task", | |
"origin_acc": 77.5, | |
"origin_prompt": "Consider the context and question, and indicate if the answer can be logically deduced from the context by responding with 'entailment' or 'not_entailment'.", | |
"atk_acc": 0.5, | |
"atk_prompt": "Recital the context and items, and specify if the solution can be logically subtracted from the framework by reactions with 'entailment' or 'not_entailment'. ", | |
"transfer_ori_acc": 83.39999999999999, | |
"transfer_atk_acc": 76.5 | |
}, | |
{ | |
"dataset": "qnli", | |
"type": "role", | |
"origin_acc": 78.0, | |
"origin_prompt": "As a semantic researcher, evaluate whether the provided context supports the answer to the question and choose 'entailment' or 'not_entailment'.", | |
"atk_acc": 0.0, | |
"atk_prompt": "As a semantic fellows, measuring whether the rendered background supports the answers to the item and chosen 'entailment' or 'not_entailment'. ", | |
"transfer_ori_acc": 89.1, | |
"transfer_atk_acc": 78.10000000000001 | |
}, | |
{ | |
"dataset": "qqp", | |
"type": "task", | |
"origin_acc": 80.5, | |
"origin_prompt": "Can these two statements be considered equal in meaning? Answer with 'equivalent' or 'not_equivalent'.", | |
"atk_acc": 0.0, | |
"atk_prompt": "Kan these two statement is studied same in sens? Reaction with 'equivalent' or 'not_equivalent'. ", | |
"transfer_ori_acc": 87.8, | |
"transfer_atk_acc": 59.3 | |
}, | |
{ | |
"dataset": "qqp", | |
"type": "role", | |
"origin_acc": 80.5, | |
"origin_prompt": "Functioning as a question similarity evaluation tool, analyze the given questions and decide if they share the same meaning, responding with 'equivalent' or 'not_equivalent'.", | |
"atk_acc": 76.5, | |
"atk_prompt": "Functioning as a question similarity evaluation tool, analyse the given questions and decide if they share the same feel, responding with 'equivalent' or 'not_equivalent'. ", | |
"transfer_ori_acc": 87.3, | |
"transfer_atk_acc": 87.0 | |
}, | |
{ | |
"dataset": "rte", | |
"type": "task", | |
"origin_acc": 83.5, | |
"origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.", | |
"atk_acc": 60.5, | |
"atk_prompt": "Pose the aftermath two phrases entailment or not_entailment? Reactions me with \"entailment\" or \"not_entailment\", just one adjectives. ", | |
"transfer_ori_acc": 88.08664259927798, | |
"transfer_atk_acc": 82.31046931407943 | |
}, | |
{ | |
"dataset": "rte", | |
"type": "role", | |
"origin_acc": 81.0, | |
"origin_prompt": "As an instrument for entailment evaluation, consider the two sentences and determine if their relationship is 'entailment' or 'not_entailment'. Respond with 'entailment' or 'not_entailment':", | |
"atk_acc": 60.0, | |
"atk_prompt": "Because an devices for entailment appraised, revisit the two phrases and identifying if their correlate is 'entailment' or 'not_entailment'. Answered with 'entailment' or 'not_entailment':", | |
"transfer_ori_acc": 85.5595667870036, | |
"transfer_atk_acc": 85.92057761732852 | |
}, | |
{ | |
"dataset": "sst2", | |
"type": "task", | |
"origin_acc": 90.5, | |
"origin_prompt": "Review this statement and decide whether it has a 'positive' or 'negative' sentiment:", | |
"atk_acc": 0.0, | |
"atk_prompt": "Analysis this assertion and defining whether it ai a 'positive' or 'negative' sentiment: ", | |
"transfer_ori_acc": 95.06880733944955, | |
"transfer_atk_acc": 95.29816513761467 | |
}, | |
{ | |
"dataset": "sst2", | |
"type": "role", | |
"origin_acc": 94.0, | |
"origin_prompt": "Emulating a sentiment classification system, indicate whether the provided text is 'positive' or 'negative'. Classify:", | |
"atk_acc": 4.5, | |
"atk_prompt": "Emulating a foreboding classifies regime, explain whether the stipulated laws is 'positive' or 'negative'. Grading: ", | |
"transfer_ori_acc": 94.72477064220183, | |
"transfer_atk_acc": 91.5137614678899 | |
}, | |
{ | |
"dataset": "wnli", | |
"type": "task", | |
"origin_acc": 69.01, | |
"origin_prompt": "Identify whether the given pair of sentences demonstrates entailment or not_entailment. Answer with 'entailment' or 'not_entailment'.", | |
"atk_acc": 52.11, | |
"atk_prompt": "Identify whether the given pair of sentence exposes entailment or not_entailment. Retaliated with 'entailment' or 'not_entailment'.", | |
"transfer_ori_acc": 59.154929577464785, | |
"transfer_atk_acc": 60.56338028169014 | |
}, | |
{ | |
"dataset": "wnli", | |
"type": "role", | |
"origin_acc": 67.61, | |
"origin_prompt": "Acting as an entailment detection instrument, determine if the given pair of sentences demonstrates entailment or not_entailment. Answer with 'entailment' or 'not_entailment':", | |
"atk_acc": 46.48, | |
"atk_prompt": "Acting as an entailment detection instrument, determine if the given torque of sentences depicting entailment or not_entailment. Reactions with 'entailment' or 'not_entailment':", | |
"transfer_ori_acc": 59.154929577464785, | |
"transfer_atk_acc": 59.154929577464785 | |
}, | |
{ | |
"dataset": "mmlu", | |
"type": "task", | |
"origin_acc": 64.0, | |
"origin_prompt": "In relation to the multiple-choice question on {}, please provide the accurate answer by choosing 'A', 'B', 'C', or 'D'.", | |
"atk_acc": 50.5, | |
"atk_prompt": "In relation to the multiple-choice issue on {}, please delivering the clarification rebuttal by selector 'A', 'B', 'C', or 'D'. ", | |
"transfer_ori_acc": 44.858156028368796, | |
"transfer_atk_acc": 42.02127659574468 | |
}, | |
{ | |
"dataset": "mmlu", | |
"type": "role", | |
"origin_acc": 61.5, | |
"origin_prompt": "As a skilled person in the field of {}, please respond to the multiple-choice question by choosing 'A', 'B', 'C', or 'D'.", | |
"atk_acc": 54.5, | |
"atk_prompt": "As a expert person in the minefields of {}, please reacts to the multiple-choice question by choice 'A', 'B', 'C', or 'D'.", | |
"transfer_ori_acc": 44.680851063829785, | |
"transfer_atk_acc": 44.50354609929078 | |
}, | |
{ | |
"dataset": "squad_v2", | |
"type": "task", | |
"origin_acc": 70.01, | |
"origin_prompt": "Based on the given context, provide the best possible answer. If there's no answer available in the context, respond with 'unanswerable'.", | |
"atk_acc": 5.68, | |
"atk_prompt": "Based on the received backgrounds, provide the nicest unlikely respond. Although there's no riposte available in the context, reactions with 'unanswerable'.", | |
"transfer_ori_acc": 87.04992770387507, | |
"transfer_atk_acc": 81.35502139314727 | |
}, | |
{ | |
"dataset": "squad_v2", | |
"type": "role", | |
"origin_acc": 69.17, | |
"origin_prompt": "Using your knowledge of the context, identify the best answer to the question. If the context doesn't provide an answer, write 'unanswerable'.", | |
"atk_acc": 12.54, | |
"atk_prompt": "Using your skill of the background, specify the improved rebuttal to the question. While the context doesn't delivering an reactions, writings 'unanswerable'.", | |
"transfer_ori_acc": 87.54992770387507, | |
"transfer_atk_acc": 74.81077674890263 | |
}, | |
{ | |
"dataset": "iwslt", | |
"type": "task", | |
"origin_acc": 0.22, | |
"origin_prompt": "Using machine translation, convert the given sentence from {} into the {} language:", | |
"atk_acc": 0.0, | |
"atk_prompt": "Using typewriter translation, transformation the given sentence from {} into the {} pronunciation:", | |
"transfer_ori_acc": 0.13366423747896072, | |
"transfer_atk_acc": 0.13708349489640922 | |
}, | |
{ | |
"dataset": "iwslt", | |
"type": "role", | |
"origin_acc": 0.22, | |
"origin_prompt": "Taking on the role of a certified translator, perform a translation of the given sentence from {} to {}:", | |
"atk_acc": 0.0, | |
"atk_prompt": "Select on the features of a certifiable translators, conducting a translation of the gave sentencing from {} to {}: ", | |
"transfer_ori_acc": 0.14266566803921382, | |
"transfer_atk_acc": 0.1319452584104338 | |
}, | |
{ | |
"dataset": "un_multi", | |
"type": "task", | |
"origin_acc": 0.34, | |
"origin_prompt": "Translate the subsequent text passage from its original {} language to the {} language:", | |
"atk_acc": 0.07, | |
"atk_prompt": "Translate the sequential version passage from its primitive {} grammar to the {} pronunciation:", | |
"transfer_ori_acc": 0.2679607664026083, | |
"transfer_atk_acc": 0.26331875982607134 | |
}, | |
{ | |
"dataset": "un_multi", | |
"type": "role", | |
"origin_acc": 0.34, | |
"origin_prompt": "While acting as an experienced language expert, translate the following text from {} to {}:", | |
"atk_acc": 0.27, | |
"atk_prompt": "While acting as an suffered dialect expert, translate the below laws from {} to {}: ", | |
"transfer_ori_acc": 0.26489202618745367, | |
"transfer_atk_acc": 0.2534345391549418 | |
}, | |
{ | |
"dataset": "math", | |
"type": "task", | |
"origin_acc": 6.88, | |
"origin_prompt": "Obtain the result for this math question regarding {}:", | |
"atk_acc": 0.0, | |
"atk_prompt": "Obtain the achievements for this math question respecting {}:", | |
"transfer_ori_acc": 14.374999999999998, | |
"transfer_atk_acc": 14.374999999999998 | |
}, | |
{ | |
"dataset": "math", | |
"type": "role", | |
"origin_acc": 5.0, | |
"origin_prompt": "As a mathematics instructor, calculate the answer to the following problem related to {}:", | |
"atk_acc": 0.0, | |
"atk_prompt": "As a mathematics instructor, calculate the rebuttal to the following problem related to {}: ", | |
"transfer_ori_acc": 11.875, | |
"transfer_atk_acc": 11.25 | |
} | |
] |