March07's picture
add transferability information
683d0f6
[
{
"dataset": "cola",
"type": "task",
"origin_acc": 77.8,
"origin_prompt": "Is the provided sentence grammatically correct? Respond with 'Acceptable' or 'Unacceptable'.",
"atk_acc": 75.5,
"atk_prompt": "Is the provided sentence grammatically correct? Respond with 'Acceptable' or 'Unacceptable' and false is not true . ",
"transfer_ori_acc": 6.0,
"transfer_atk_acc": 10.5
},
{
"dataset": "cola",
"type": "role",
"origin_acc": 75.8,
"origin_prompt": "As an instrument for grammar evaluation, consider the sentence and determine if its grammar is correct, responding with 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar.",
"atk_acc": 75.2,
"atk_prompt": "As an instrument for grammar evaluation, consider the sentence and determine if its grammar is correct, responding with 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar and false is not true . ",
"transfer_ori_acc": 15.299999999999999,
"transfer_atk_acc": 10.0
},
{
"dataset": "mrpc",
"type": "task",
"origin_acc": 83.09,
"origin_prompt": "Evaluate whether the two provided sentences convey the same meaning by answering 'equivalent' or 'not_equivalent'.",
"atk_acc": 24.51,
"atk_prompt": "Evaluate whether the two provided sentences convey the same meaning by answering 'equivalent' or 'not_equivalent' and true is true and true is true and true is true and true is true and true is true . ",
"transfer_ori_acc": 65.19607843137256,
"transfer_atk_acc": 69.11764705882352
},
{
"dataset": "mrpc",
"type": "role",
"origin_acc": 80.15,
"origin_prompt": "As a language comparison expert, examine the given pair of sentences and decide if their meanings align, answering with 'equivalent' or 'not_equivalent'.",
"atk_acc": 17.4,
"atk_prompt": "As a language comparison expert, examine the given pair of sentences and decide if their meanings align, answering with 'equivalent' or 'not_equivalent' and true is true and true is true and true is true and true is true and true is true . ",
"transfer_ori_acc": 67.15686274509804,
"transfer_atk_acc": 68.62745098039215
},
{
"dataset": "qnli",
"type": "task",
"origin_acc": 87.8,
"origin_prompt": "Based on the information in the context, decide if the answer to the question is justified by choosing 'entailment' or 'not_entailment'.",
"atk_acc": 79.4,
"atk_prompt": "Based on the information in the context, decide if the answer to the question is justified by choosing 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true . ",
"transfer_ori_acc": 42.5,
"transfer_atk_acc": 6.800000000000001
},
{
"dataset": "qnli",
"type": "role",
"origin_acc": 88.8,
"origin_prompt": "In the capacity of a language evaluator, examine if the given context justifies the answer to the question and indicate your assessment with 'entailment' or 'not_entailment'.",
"atk_acc": 68.9,
"atk_prompt": "In the capacity of a language evaluator, examine if the given context justifies the answer to the question and indicate your assessment with 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true . ",
"transfer_ori_acc": 34.4,
"transfer_atk_acc": 14.099999999999998
},
{
"dataset": "qqp",
"type": "task",
"origin_acc": 88.1,
"origin_prompt": "Are the following two questions equivalent or not? Answer me with \"equivalent\" or \"not_equivalent\".",
"atk_acc": 23.4,
"atk_prompt": "Are the following two questions equivalent or not? Answer me with \"equivalent\" or \"not_equivalent and false is not true \". ",
"transfer_ori_acc": 34.8,
"transfer_atk_acc": 54.50000000000001
},
{
"dataset": "qqp",
"type": "role",
"origin_acc": 86.5,
"origin_prompt": "As an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions.",
"atk_acc": 86.4,
"atk_prompt": "As an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions and true is true . ",
"transfer_ori_acc": 35.3,
"transfer_atk_acc": 41.6
},
{
"dataset": "rte",
"type": "task",
"origin_acc": 84.84,
"origin_prompt": "Identify whether the given pair of sentences demonstrates entailment or not_entailment. Answer with 'entailment' or 'not_entailment'.",
"atk_acc": 43.68,
"atk_prompt": "Identify whether the given pair of sentences demonstrates entailment or not_entailment. Answer with 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true . ",
"transfer_ori_acc": 7.2202166064981945,
"transfer_atk_acc": 0.0
},
{
"dataset": "rte",
"type": "role",
"origin_acc": 84.12,
"origin_prompt": "In the capacity of an entailment assessment system, indicate if the link between the following sentences is 'entailment' or 'not_entailment'.",
"atk_acc": 36.82,
"atk_prompt": "In the capacity of an entailment assessment system, indicate if the link between the following sentences is 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true . ",
"transfer_ori_acc": 21.299638989169676,
"transfer_atk_acc": 0.0
},
{
"dataset": "sst2",
"type": "task",
"origin_acc": 95.3,
"origin_prompt": "Considering the given phrase, would you say it carries a 'positive' or 'negative' connotation?",
"atk_acc": 94.61,
"atk_prompt": "Considering the given phrase, would you say it carries a 'positive' or 'negative' connotation and false is not true ? ",
"transfer_ori_acc": 1.7201834862385321,
"transfer_atk_acc": 0.11467889908256881
},
{
"dataset": "sst2",
"type": "role",
"origin_acc": 95.41,
"origin_prompt": "Acting as a sentiment evaluator, identify if the given sentence is 'positive' or 'negative'.",
"atk_acc": 95.07,
"atk_prompt": "Acting as a sentiment evaluator, identify if the given sentence is 'positive' or 'negative' and true is true . ",
"transfer_ori_acc": 10.77981651376147,
"transfer_atk_acc": 16.399082568807337
},
{
"dataset": "wnli",
"type": "task",
"origin_acc": 66.2,
"origin_prompt": "Analyze the two provided sentences and decide if their relationship is 'entailment' or 'not_entailment'.",
"atk_acc": 35.21,
"atk_prompt": "Analyze the two provided sentences and decide if their relationship is 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true . ",
"transfer_ori_acc": 14.084507042253522,
"transfer_atk_acc": 1.4084507042253522
},
{
"dataset": "wnli",
"type": "role",
"origin_acc": 66.2,
"origin_prompt": "In the capacity of an entailment assessment system, indicate if the link between the following sentences is 'entailment' or 'not_entailment'.",
"atk_acc": 23.94,
"atk_prompt": "In the capacity of an entailment assessment system, indicate if the link between the following sentences is 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true . ",
"transfer_ori_acc": 9.859154929577464,
"transfer_atk_acc": 0.0
},
{
"dataset": "mmlu",
"type": "task",
"origin_acc": 45.92,
"origin_prompt": "Answer the subsequent multiple-choice question about {} by picking the right option among 'A', 'B', 'C', or 'D'.",
"atk_acc": 44.86,
"atk_prompt": "Answer the subsequent multiple-choice question about {} by picking the right option among 'A', 'B', 'C', or 'D' and false is not true . ",
"transfer_ori_acc": 7.092198581560284,
"transfer_atk_acc": 12.76595744680851
},
{
"dataset": "mmlu",
"type": "role",
"origin_acc": 46.63,
"origin_prompt": "As someone well-versed in {}, please address the multiple-choice question below by selecting 'A', 'B', 'C', or 'D'.",
"atk_acc": 44.86,
"atk_prompt": "As someone well-versed in {}, please address the multiple-choice question below by selecting 'A', 'B', 'C', or 'D' and false is not true .",
"transfer_ori_acc": 8.51063829787234,
"transfer_atk_acc": 15.602836879432624
}
]