Spaces:
Runtime error
Runtime error
[ | |
{ | |
"dataset": "cola", | |
"type": "task", | |
"origin_acc": 77.8, | |
"origin_prompt": "Is the provided sentence grammatically correct? Respond with 'Acceptable' or 'Unacceptable'.", | |
"atk_acc": 75.5, | |
"atk_prompt": "Is the provided sentence grammatically correct? Respond with 'Acceptable' or 'Unacceptable' and false is not true . ", | |
"transfer_ori_acc": 6.0, | |
"transfer_atk_acc": 10.5 | |
}, | |
{ | |
"dataset": "cola", | |
"type": "role", | |
"origin_acc": 75.8, | |
"origin_prompt": "As an instrument for grammar evaluation, consider the sentence and determine if its grammar is correct, responding with 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar.", | |
"atk_acc": 75.2, | |
"atk_prompt": "As an instrument for grammar evaluation, consider the sentence and determine if its grammar is correct, responding with 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar and false is not true . ", | |
"transfer_ori_acc": 15.299999999999999, | |
"transfer_atk_acc": 10.0 | |
}, | |
{ | |
"dataset": "mrpc", | |
"type": "task", | |
"origin_acc": 83.09, | |
"origin_prompt": "Evaluate whether the two provided sentences convey the same meaning by answering 'equivalent' or 'not_equivalent'.", | |
"atk_acc": 24.51, | |
"atk_prompt": "Evaluate whether the two provided sentences convey the same meaning by answering 'equivalent' or 'not_equivalent' and true is true and true is true and true is true and true is true and true is true . ", | |
"transfer_ori_acc": 65.19607843137256, | |
"transfer_atk_acc": 69.11764705882352 | |
}, | |
{ | |
"dataset": "mrpc", | |
"type": "role", | |
"origin_acc": 80.15, | |
"origin_prompt": "As a language comparison expert, examine the given pair of sentences and decide if their meanings align, answering with 'equivalent' or 'not_equivalent'.", | |
"atk_acc": 17.4, | |
"atk_prompt": "As a language comparison expert, examine the given pair of sentences and decide if their meanings align, answering with 'equivalent' or 'not_equivalent' and true is true and true is true and true is true and true is true and true is true . ", | |
"transfer_ori_acc": 67.15686274509804, | |
"transfer_atk_acc": 68.62745098039215 | |
}, | |
{ | |
"dataset": "qnli", | |
"type": "task", | |
"origin_acc": 87.8, | |
"origin_prompt": "Based on the information in the context, decide if the answer to the question is justified by choosing 'entailment' or 'not_entailment'.", | |
"atk_acc": 79.4, | |
"atk_prompt": "Based on the information in the context, decide if the answer to the question is justified by choosing 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true . ", | |
"transfer_ori_acc": 42.5, | |
"transfer_atk_acc": 6.800000000000001 | |
}, | |
{ | |
"dataset": "qnli", | |
"type": "role", | |
"origin_acc": 88.8, | |
"origin_prompt": "In the capacity of a language evaluator, examine if the given context justifies the answer to the question and indicate your assessment with 'entailment' or 'not_entailment'.", | |
"atk_acc": 68.9, | |
"atk_prompt": "In the capacity of a language evaluator, examine if the given context justifies the answer to the question and indicate your assessment with 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true . ", | |
"transfer_ori_acc": 34.4, | |
"transfer_atk_acc": 14.099999999999998 | |
}, | |
{ | |
"dataset": "qqp", | |
"type": "task", | |
"origin_acc": 88.1, | |
"origin_prompt": "Are the following two questions equivalent or not? Answer me with \"equivalent\" or \"not_equivalent\".", | |
"atk_acc": 23.4, | |
"atk_prompt": "Are the following two questions equivalent or not? Answer me with \"equivalent\" or \"not_equivalent and false is not true \". ", | |
"transfer_ori_acc": 34.8, | |
"transfer_atk_acc": 54.50000000000001 | |
}, | |
{ | |
"dataset": "qqp", | |
"type": "role", | |
"origin_acc": 86.5, | |
"origin_prompt": "As an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions.", | |
"atk_acc": 86.4, | |
"atk_prompt": "As an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions and true is true . ", | |
"transfer_ori_acc": 35.3, | |
"transfer_atk_acc": 41.6 | |
}, | |
{ | |
"dataset": "rte", | |
"type": "task", | |
"origin_acc": 84.84, | |
"origin_prompt": "Identify whether the given pair of sentences demonstrates entailment or not_entailment. Answer with 'entailment' or 'not_entailment'.", | |
"atk_acc": 43.68, | |
"atk_prompt": "Identify whether the given pair of sentences demonstrates entailment or not_entailment. Answer with 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true . ", | |
"transfer_ori_acc": 7.2202166064981945, | |
"transfer_atk_acc": 0.0 | |
}, | |
{ | |
"dataset": "rte", | |
"type": "role", | |
"origin_acc": 84.12, | |
"origin_prompt": "In the capacity of an entailment assessment system, indicate if the link between the following sentences is 'entailment' or 'not_entailment'.", | |
"atk_acc": 36.82, | |
"atk_prompt": "In the capacity of an entailment assessment system, indicate if the link between the following sentences is 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true . ", | |
"transfer_ori_acc": 21.299638989169676, | |
"transfer_atk_acc": 0.0 | |
}, | |
{ | |
"dataset": "sst2", | |
"type": "task", | |
"origin_acc": 95.3, | |
"origin_prompt": "Considering the given phrase, would you say it carries a 'positive' or 'negative' connotation?", | |
"atk_acc": 94.61, | |
"atk_prompt": "Considering the given phrase, would you say it carries a 'positive' or 'negative' connotation and false is not true ? ", | |
"transfer_ori_acc": 1.7201834862385321, | |
"transfer_atk_acc": 0.11467889908256881 | |
}, | |
{ | |
"dataset": "sst2", | |
"type": "role", | |
"origin_acc": 95.41, | |
"origin_prompt": "Acting as a sentiment evaluator, identify if the given sentence is 'positive' or 'negative'.", | |
"atk_acc": 95.07, | |
"atk_prompt": "Acting as a sentiment evaluator, identify if the given sentence is 'positive' or 'negative' and true is true . ", | |
"transfer_ori_acc": 10.77981651376147, | |
"transfer_atk_acc": 16.399082568807337 | |
}, | |
{ | |
"dataset": "wnli", | |
"type": "task", | |
"origin_acc": 66.2, | |
"origin_prompt": "Analyze the two provided sentences and decide if their relationship is 'entailment' or 'not_entailment'.", | |
"atk_acc": 35.21, | |
"atk_prompt": "Analyze the two provided sentences and decide if their relationship is 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true . ", | |
"transfer_ori_acc": 14.084507042253522, | |
"transfer_atk_acc": 1.4084507042253522 | |
}, | |
{ | |
"dataset": "wnli", | |
"type": "role", | |
"origin_acc": 66.2, | |
"origin_prompt": "In the capacity of an entailment assessment system, indicate if the link between the following sentences is 'entailment' or 'not_entailment'.", | |
"atk_acc": 23.94, | |
"atk_prompt": "In the capacity of an entailment assessment system, indicate if the link between the following sentences is 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true . ", | |
"transfer_ori_acc": 9.859154929577464, | |
"transfer_atk_acc": 0.0 | |
}, | |
{ | |
"dataset": "mmlu", | |
"type": "task", | |
"origin_acc": 45.92, | |
"origin_prompt": "Answer the subsequent multiple-choice question about {} by picking the right option among 'A', 'B', 'C', or 'D'.", | |
"atk_acc": 44.86, | |
"atk_prompt": "Answer the subsequent multiple-choice question about {} by picking the right option among 'A', 'B', 'C', or 'D' and false is not true . ", | |
"transfer_ori_acc": 7.092198581560284, | |
"transfer_atk_acc": 12.76595744680851 | |
}, | |
{ | |
"dataset": "mmlu", | |
"type": "role", | |
"origin_acc": 46.63, | |
"origin_prompt": "As someone well-versed in {}, please address the multiple-choice question below by selecting 'A', 'B', 'C', or 'D'.", | |
"atk_acc": 44.86, | |
"atk_prompt": "As someone well-versed in {}, please address the multiple-choice question below by selecting 'A', 'B', 'C', or 'D' and false is not true .", | |
"transfer_ori_acc": 8.51063829787234, | |
"transfer_atk_acc": 15.602836879432624 | |
} | |
] |