Spaces:
Runtime error
Runtime error
[ | |
{ | |
"dataset": "cola", | |
"type": "task", | |
"origin_acc": 69.2, | |
"origin_prompt": "Assess the following sentence and determine if it is grammatically correct. Respond with 'Acceptable' or 'Unacceptable':", | |
"atk_acc": 3.6, | |
"atk_prompt": "sAsess the ofllowing lsentence and determine if it is grammatiaclly correct. Rspond with 'Acceptable' or 'Unacceptable':", | |
"transfer_ori_acc": 86.2, | |
"transfer_atk_acc": 86.3 | |
}, | |
{ | |
"dataset": "cola", | |
"type": "role", | |
"origin_acc": 50.6, | |
"origin_prompt": "In the role of a syntax analyzer, examine the grammar of the provided sentence and decide if it is correct, answering with 'acceptable' for grammatically correct sentences or 'unacceptable' for incorrect ones:", | |
"atk_acc": 4.8, | |
"atk_prompt": "n the role of a syntax analyzer, examine the grammar of the provided sentence and decide if it is correct, answering with 'acceptable' for grammatically correct sentences or 'unacceptable' for incorret oKnes:", | |
"transfer_ori_acc": 85.6, | |
"transfer_atk_acc": 85.8 | |
}, | |
{ | |
"dataset": "mrpc", | |
"type": "task", | |
"origin_acc": 57.84, | |
"origin_prompt": "Compare the following sentences and determine if they share the same semantic meaning by responding with 'equivalent' or 'not_equivalent'.", | |
"atk_acc": 0.0, | |
"atk_prompt": "oCmpare the follAwing sentewces and determine if they share the same swmantic meaning by responding with 'equivalent' or 'not_equivalent'. ", | |
"transfer_ori_acc": 86.02941176470588, | |
"transfer_atk_acc": 86.02941176470588 | |
}, | |
{ | |
"dataset": "mrpc", | |
"type": "role", | |
"origin_acc": 52.21, | |
"origin_prompt": "In your capacity as a semantic comparison tool, examine the given sentences and decide if their meanings are identical, responding with 'equivalent' or 'not_equivalent'.", | |
"atk_acc": 1.47, | |
"atk_prompt": "IYn your capaciMy as a semantic comparison ool, examine the given sentences and deXide if their meanings are identRcal, rseponding with 'equivalent' or 'not_equivalent'. ", | |
"transfer_ori_acc": 87.00980392156863, | |
"transfer_atk_acc": 86.51960784313727 | |
}, | |
{ | |
"dataset": "qnli", | |
"type": "task", | |
"origin_acc": 30.5, | |
"origin_prompt": "Review the given context and question, and decide if the context contains enough information to support the answer by selecting 'entailment' or 'not_entailment'.", | |
"atk_acc": 4.7, | |
"atk_prompt": "Rview the gven cnotext and question, and edcide if the context containZs enough information to Vsupport the ainswer by selecting 'entailment' or 'not_entailment'. ", | |
"transfer_ori_acc": 94.1, | |
"transfer_atk_acc": 94.19999999999999 | |
}, | |
{ | |
"dataset": "qnli", | |
"type": "role", | |
"origin_acc": 35.3, | |
"origin_prompt": "As a language expert, assess if the given context entails the answer to the question and respond with 'entailment' or 'not_entailment'.", | |
"atk_acc": 2.1, | |
"atk_prompt": "As a language epxert, arssess if the gien contet entaxls the ansGer to the questAon and resNpond with 'entailment' or 'not_entailment'. ", | |
"transfer_ori_acc": 94.19999999999999, | |
"transfer_atk_acc": 94.5 | |
}, | |
{ | |
"dataset": "qqp", | |
"type": "task", | |
"origin_acc": 24.1, | |
"origin_prompt": "Analyze if the given set of sentences have the same connotation by answering with 'equivalent' or 'not_equivalent'.", | |
"atk_acc": 2.0, | |
"atk_prompt": "tAnalyze if the giCen est of sentUnces have the same connotatioq by answering with 'equivalent' or 'not_equivalent'. ", | |
"transfer_ori_acc": 87.6, | |
"transfer_atk_acc": 88.6 | |
}, | |
{ | |
"dataset": "qqp", | |
"type": "role", | |
"origin_acc": 16.9, | |
"origin_prompt": "In the role of a question similarity analyzer, examine the meaning of the provided questions and decide if they are equivalent, answering with 'equivalent' for equivalent questions or 'not_equivalent' for different questions.", | |
"atk_acc": 0.7, | |
"atk_prompt": "IZn the role of a questoion similarity naalyzer, examin the meJaning of the provided questions and Uecide if they are equivalent, answering with 'equivalent' for equivalent questions or 'not_equivalent' for dSfferent questioXns. ", | |
"transfer_ori_acc": 88.1, | |
"transfer_atk_acc": 88.5 | |
}, | |
{ | |
"dataset": "rte", | |
"type": "task", | |
"origin_acc": 41.16, | |
"origin_prompt": "Determine if the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment'.", | |
"atk_acc": 1.08, | |
"atk_prompt": "DetermiCne if the givePn pir of sentcnces disWlays entailment or not_entailment. Rtspond with 'entailment' or 'not_entailment'.", | |
"transfer_ori_acc": 93.50180505415162, | |
"transfer_atk_acc": 93.50180505415162 | |
}, | |
{ | |
"dataset": "rte", | |
"type": "role", | |
"origin_acc": 49.82, | |
"origin_prompt": "Working as an entailment classifier, identify whether the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment':", | |
"atk_acc": 1.08, | |
"atk_prompt": "orking as an entailment clasifier, idenStify whethre the givec ptair of sentInces displays entailment or not_entailment. ReKspond with 'entailment' or 'not_entailment':", | |
"transfer_ori_acc": 94.58483754512635, | |
"transfer_atk_acc": 93.14079422382672 | |
}, | |
{ | |
"dataset": "sst2", | |
"type": "task", | |
"origin_acc": 48.05, | |
"origin_prompt": "After examining the following expression, label its emotion as either 'positive' or 'negative':", | |
"atk_acc": 0.8, | |
"atk_prompt": "kfter eximining the followinM expression, labe its emolion as egther 'positive' or 'negative': ", | |
"transfer_ori_acc": 91.97247706422019, | |
"transfer_atk_acc": 95.87155963302753 | |
}, | |
{ | |
"dataset": "sst2", | |
"type": "role", | |
"origin_acc": 41.06, | |
"origin_prompt": "Functioning as a sentiment identification tool, assess if the following expression is 'positive' or 'negative'. Classify:", | |
"atk_acc": 27.18, | |
"atk_prompt": "Functionnig as a sentiment identification tool, assess if the flolowing xepression is 'positive' or 'negative'. ClUssify: ", | |
"transfer_ori_acc": 95.9862385321101, | |
"transfer_atk_acc": 96.3302752293578 | |
}, | |
{ | |
"dataset": "wnli", | |
"type": "task", | |
"origin_acc": 43.66, | |
"origin_prompt": "Determine if the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment'.", | |
"atk_acc": 0.0, | |
"atk_prompt": "VDetermine if the giPen Apair of setences displUays entailment or not_entailment. QRespond with 'entailment' or 'not_entailment'.", | |
"transfer_ori_acc": 78.87323943661971, | |
"transfer_atk_acc": 80.28169014084507 | |
}, | |
{ | |
"dataset": "wnli", | |
"type": "role", | |
"origin_acc": 43.66, | |
"origin_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment':", | |
"atk_acc": 0.0, | |
"atk_prompt": "kn your rohe as an entailment analysis tool, Zassess the relationship between the gUiven saentences and clasFsify it as 'entailment' or 'not_entailment':", | |
"transfer_ori_acc": 78.87323943661971, | |
"transfer_atk_acc": 77.46478873239437 | |
}, | |
{ | |
"dataset": "mmlu", | |
"type": "task", | |
"origin_acc": 25.67, | |
"origin_prompt": "Evaluate the multiple-choice question about {} and select the most fitting response from 'A', 'B', 'C', or 'D'.", | |
"atk_acc": 14.7, | |
"atk_prompt": "Evaluate the multple-choice question about {} and elect the most fitting respone from 'A', 'B', 'C', or 'D'. ", | |
"transfer_ori_acc": 53.01418439716312, | |
"transfer_atk_acc": 52.4822695035461 | |
}, | |
{ | |
"dataset": "mmlu", | |
"type": "role", | |
"origin_acc": 25.09, | |
"origin_prompt": "As a knowledgeable individual in {}, provide your response to the multiple-choice question by choosing 'A', 'B', 'C', or 'D'.", | |
"atk_acc": 16.45, | |
"atk_prompt": "As a kAnowledgeable indviidual in {}, provide your ersponse to the multipole-choice quesxion by choosing 'A', 'B', 'C', or 'D'.", | |
"transfer_ori_acc": 53.54609929078015, | |
"transfer_atk_acc": 53.01418439716312 | |
} | |
] |