Spaces:
Runtime error
Runtime error
[ | |
{ | |
"dataset": "cola", | |
"type": "task", | |
"origin_acc": 69.0, | |
"origin_prompt": "Assess the following sentence and determine if it is grammatically correct. Respond with 'Acceptable' or 'Unacceptable':", | |
"atk_acc": 0.0, | |
"atk_prompt": "Evaluating the later sen𝚝ence and determine if it is grammatically correct. Respoոd with 'Acceptable' or 'Unacceptable':", | |
"transfer_ori_acc": 78.0, | |
"transfer_atk_acc": 76.1 | |
}, | |
{ | |
"dataset": "cola", | |
"type": "role", | |
"origin_acc": 54.9, | |
"origin_prompt": "As a tool for determining grammatical correctness, review the sentence and categorize its grammar as either 'acceptable' or 'unacceptable':", | |
"atk_acc": 0.0, | |
"atk_prompt": "Aѕ a tool for determining grammatical regularity, reviewing the penalties and categorizе its grammar as ei𝚝her 'acceptable' or 'unacceptable':", | |
"transfer_ori_acc": 74.9, | |
"transfer_atk_acc": 55.50000000000001 | |
}, | |
{ | |
"dataset": "mrpc", | |
"type": "task", | |
"origin_acc": 58.09, | |
"origin_prompt": "Compare the following sentences and determine if they share the same semantic meaning by responding with 'equivalent' or 'not_equivalent'.", | |
"atk_acc": 0.49, | |
"atk_prompt": "Comрare the later sentences and determine if they share the same semantic meaning by respoոding with 'equivalent' or 'not_equivalent'. ", | |
"transfer_ori_acc": 82.1078431372549, | |
"transfer_atk_acc": 81.61764705882352 | |
}, | |
{ | |
"dataset": "mrpc", | |
"type": "role", | |
"origin_acc": 53.43, | |
"origin_prompt": "In the role of a sentence comparison analyst, assess the provided sentences and indicate if they convey the same meaning by selecting 'equivalent' or 'not_equivalent'.", | |
"atk_acc": 0.0, | |
"atk_prompt": "Iո the rloe of a sentence comparison analyst, asess the provided sentences and indicate if they convey the same meaning by selec𝚝ing 'equivalent' or 'not_equivalent'. ", | |
"transfer_ori_acc": 81.12745098039215, | |
"transfer_atk_acc": 83.08823529411765 | |
}, | |
{ | |
"dataset": "qnli", | |
"type": "task", | |
"origin_acc": 30.5, | |
"origin_prompt": "Review the given context and question, and decide if the context contains enough information to support the answer by selecting 'entailment' or 'not_entailment'.", | |
"atk_acc": 0.0, | |
"atk_prompt": "Revisited the afforded contеxt and question, and decide if the context contains enough information to support the answer by selec𝚝ing 'entailment' or 'not_entailment'. ", | |
"transfer_ori_acc": 85.2, | |
"transfer_atk_acc": 85.39999999999999 | |
}, | |
{ | |
"dataset": "qnli", | |
"type": "role", | |
"origin_acc": 38.4, | |
"origin_prompt": "As a textual inference expert, analyze if the answer to the question can be deduced from the provided context and select 'entailment' or 'not_entailment'.", | |
"atk_acc": 0.1, | |
"atk_prompt": "Aѕ a textual deduced expeⲅt, analyzes if the replying to the question can be deduced from the provided context and selec𝚝 'entailment' or 'not_entailment'. ", | |
"transfer_ori_acc": 78.10000000000001, | |
"transfer_atk_acc": 83.89999999999999 | |
}, | |
{ | |
"dataset": "qqp", | |
"type": "task", | |
"origin_acc": 23.9, | |
"origin_prompt": "Can these two statements be considered equal in meaning? Answer with 'equivalent' or 'not_equivalent'.", | |
"atk_acc": 0.2, | |
"atk_prompt": "Possible these two statemen𝚝s be considered equal in meaing? Answer with 'equivalent' or 'not_equivalent'. ", | |
"transfer_ori_acc": 87.8, | |
"transfer_atk_acc": 87.8 | |
}, | |
{ | |
"dataset": "qqp", | |
"type": "role", | |
"origin_acc": 16.9, | |
"origin_prompt": "In the role of a question similarity analyzer, examine the meaning of the provided questions and decide if they are equivalent, answering with 'equivalent' for equivalent questions or 'not_equivalent' for different questions.", | |
"atk_acc": 0.0, | |
"atk_prompt": "Iո the role of a question similarity analyzer, examine the meaning of the provided questions and decide if they are equivalent, answering with 'equivalent' for equivalent questions or 'not_equivalent' for different ques𝚝ions. ", | |
"transfer_ori_acc": 87.1, | |
"transfer_atk_acc": 87.2 | |
}, | |
{ | |
"dataset": "rte", | |
"type": "task", | |
"origin_acc": 45.13, | |
"origin_prompt": "Does the relationship between the given sentences represent entailment or not_entailment? Respond with 'entailment' or 'not_entailment'.", | |
"atk_acc": 1.08, | |
"atk_prompt": "Ai the relationship between the given sentence represent entailment or not_entailment? Answer with 'entailment' or 'not_entailment'.", | |
"transfer_ori_acc": 85.1985559566787, | |
"transfer_atk_acc": 84.47653429602889 | |
}, | |
{ | |
"dataset": "rte", | |
"type": "role", | |
"origin_acc": 51.26, | |
"origin_prompt": "In the role of a semantic relationship analyzer, examine the connection between the given sentences and decide if they exhibit entailment or not_entailment. Answer with 'entailment' or 'not_entailment':", | |
"atk_acc": 0.36, | |
"atk_prompt": "Onto the role of a semantic relationship offing, e×amine the connection between the given seոtences and decіde if they exhibit entailment or not_entailment. Answer with 'entailment' or 'not_entailment':", | |
"transfer_ori_acc": 82.31046931407943, | |
"transfer_atk_acc": 83.03249097472924 | |
}, | |
{ | |
"dataset": "sst2", | |
"type": "task", | |
"origin_acc": 48.05, | |
"origin_prompt": "After examining the following expression, label its emotion as either 'positive' or 'negative':", | |
"atk_acc": 2.06, | |
"atk_prompt": "After examining the following expression, label its emo𝚝ion as neither 'positive' or 'negative': ", | |
"transfer_ori_acc": 93.23394495412845, | |
"transfer_atk_acc": 93.46330275229357 | |
}, | |
{ | |
"dataset": "sst2", | |
"type": "role", | |
"origin_acc": 37.5, | |
"origin_prompt": "As an emotion detector, determine if the provided passage conveys a 'positive' or 'negative' sentiment. Classify:", | |
"atk_acc": 1.03, | |
"atk_prompt": "Because an emotion detector, determine if the povided passgae convey a 'positive' or 'negative' ambience. Classi𝚏y: ", | |
"transfer_ori_acc": 94.83944954128441, | |
"transfer_atk_acc": 91.85779816513761 | |
}, | |
{ | |
"dataset": "wnli", | |
"type": "task", | |
"origin_acc": 43.66, | |
"origin_prompt": "Does the relationship between the given sentences represent entailment or not_entailment? Respond with 'entailment' or 'not_entailment'.", | |
"atk_acc": 1.41, | |
"atk_prompt": "Deos the relationship between the given sentences represent entailment or not_entailment? Answering with 'entailment' or 'not_entailment'.", | |
"transfer_ori_acc": 61.97183098591549, | |
"transfer_atk_acc": 59.154929577464785 | |
}, | |
{ | |
"dataset": "wnli", | |
"type": "role", | |
"origin_acc": 43.66, | |
"origin_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment':", | |
"atk_acc": 0.0, | |
"atk_prompt": "During your roⅼe as an entailment analysis tool, evaluating the relationship between the given sentences and classifying it as 'entailment' or 'not_entailment':", | |
"transfer_ori_acc": 56.33802816901409, | |
"transfer_atk_acc": 57.74647887323944 | |
}, | |
{ | |
"dataset": "mmlu", | |
"type": "task", | |
"origin_acc": 25.67, | |
"origin_prompt": "Evaluate the multiple-choice question about {} and select the most fitting response from 'A', 'B', 'C', or 'D'.", | |
"atk_acc": 9.92, | |
"atk_prompt": "Assess the mul𝚝iple-choice question about {} and seⅼect the most fitting response from 'A', 'B', 'C', or 'D'. ", | |
"transfer_ori_acc": 44.50354609929078, | |
"transfer_atk_acc": 44.50354609929078 | |
}, | |
{ | |
"dataset": "mmlu", | |
"type": "role", | |
"origin_acc": 24.39, | |
"origin_prompt": "With your knowledge of {}, tackle the following multiple-choice question by choosing 'A', 'B', 'C', or 'D'.", | |
"atk_acc": 9.57, | |
"atk_prompt": "With your knowledge of {}, tackle the 𝚏ollowing multiple-choіce problem by choosing 'A', 'B', 'C', or 'D'.", | |
"transfer_ori_acc": 43.97163120567376, | |
"transfer_atk_acc": 45.744680851063826 | |
} | |
] |