March07's picture
add transferability information
683d0f6
[
{
"dataset": "cola",
"type": "task",
"origin_acc": 78.0,
"origin_prompt": "Assess the following sentence and determine if it is grammatically correct. Respond with 'Acceptable' or 'Unacceptable':",
"atk_acc": 63.5,
"atk_prompt": "Assess the following sentence and determine if it is grammatically correct. Respond with 'Acceptable' or 'Unacceptable' and false is not true :",
"transfer_ori_acc": 69.0,
"transfer_atk_acc": 34.8
},
{
"dataset": "cola",
"type": "role",
"origin_acc": 76.3,
"origin_prompt": "As an instrument for grammar evaluation, consider the sentence and determine if its grammar is correct, responding with 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar:",
"atk_acc": 62.9,
"atk_prompt": "As an instrument for grammar evaluation, consider the sentence and determine if its grammar is correct, responding with 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar and true is true and true is true and true is true and true is true and true is true :",
"transfer_ori_acc": 36.199999999999996,
"transfer_atk_acc": 54.800000000000004
},
{
"dataset": "mrpc",
"type": "task",
"origin_acc": 82.11,
"origin_prompt": "Do the meanings of these two statements align? Indicate your answer with 'equivalent' or 'not_equivalent'.",
"atk_acc": 29.66,
"atk_prompt": "Do the meanings of these two statements align? Indicate your answer with 'equivalent' or 'not_equivalent' and true is true and true is true and true is true and true is true and true is true . ",
"transfer_ori_acc": 45.34313725490196,
"transfer_atk_acc": 61.274509803921575
},
{
"dataset": "mrpc",
"type": "role",
"origin_acc": 81.62,
"origin_prompt": "As a language comparison expert, examine the given pair of sentences and decide if their meanings align, answering with 'equivalent' or 'not_equivalent'.",
"atk_acc": 15.93,
"atk_prompt": "As a language comparison expert, examine the given pair of sentences and decide if their meanings align, answering with 'equivalent' or 'not_equivalent' and true is true and true is true and true is true and true is true and true is true . ",
"transfer_ori_acc": 63.23529411764706,
"transfer_atk_acc": 26.225490196078432
},
{
"dataset": "qnli",
"type": "task",
"origin_acc": 88.7,
"origin_prompt": "Based on the information in the context, decide if the answer to the question is justified by choosing 'entailment' or 'not_entailment'.",
"atk_acc": 78.6,
"atk_prompt": "Based on the information in the context, decide if the answer to the question is justified by choosing 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true . ",
"transfer_ori_acc": 34.1,
"transfer_atk_acc": 9.8
},
{
"dataset": "qnli",
"type": "role",
"origin_acc": 89.2,
"origin_prompt": "As a textual analyst, examine if the given context logically implies the answer to the question and indicate your decision with 'entailment' or 'not_entailment'.",
"atk_acc": 57.2,
"atk_prompt": "As a textual analyst, examine if the given context logically implies the answer to the question and indicate your decision with 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true . ",
"transfer_ori_acc": 13.8,
"transfer_atk_acc": 10.6
},
{
"dataset": "qqp",
"type": "task",
"origin_acc": 88.1,
"origin_prompt": "Are the following two questions equivalent or not? Answer me with \"equivalent\" or \"not_equivalent\".",
"atk_acc": 24.0,
"atk_prompt": "Are the following two questions equivalent or not? Answer me with \"equivalent\" or \"not_equivalent and false is not true \". ",
"transfer_ori_acc": 23.9,
"transfer_atk_acc": 30.599999999999998
},
{
"dataset": "qqp",
"type": "role",
"origin_acc": 86.9,
"origin_prompt": "In the role of a question similarity analyzer, examine the meaning of the provided questions and decide if they are equivalent, answering with 'equivalent' for equivalent questions or 'not_equivalent' for different questions.",
"atk_acc": 86.4,
"atk_prompt": "In the role of a question similarity analyzer, examine the meaning of the provided questions and decide if they are equivalent, answering with 'equivalent' for equivalent questions or 'not_equivalent' for different questions and true is true and true is true and true is true and true is true and true is true . ",
"transfer_ori_acc": 23.3,
"transfer_atk_acc": 22.2
},
{
"dataset": "rte",
"type": "task",
"origin_acc": 86.64,
"origin_prompt": "Analyze the two provided sentences and decide if their relationship is 'entailment' or 'not_entailment'.",
"atk_acc": 43.32,
"atk_prompt": "Analyze the two provided sentences and decide if their relationship is 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true .",
"transfer_ori_acc": 18.4115523465704,
"transfer_atk_acc": 16.967509025270758
},
{
"dataset": "rte",
"type": "role",
"origin_acc": 84.84,
"origin_prompt": "In the capacity of an entailment assessment system, indicate if the link between the following sentences is 'entailment' or 'not_entailment':",
"atk_acc": 38.27,
"atk_prompt": "In the capacity of an entailment assessment system, indicate if the link between the following sentences is 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true :",
"transfer_ori_acc": 43.682310469314075,
"transfer_atk_acc": 35.37906137184115
},
{
"dataset": "sst2",
"type": "task",
"origin_acc": 94.84,
"origin_prompt": "Analyze the tone of this statement and respond with either 'positive' or 'negative':",
"atk_acc": 92.43,
"atk_prompt": "Analyze the tone of this statement and respond with either 'positive' or 'negative' and true is true and true is true and true is true and true is true and true is true : ",
"transfer_ori_acc": 47.935779816513765,
"transfer_atk_acc": 43.23394495412844
},
{
"dataset": "sst2",
"type": "role",
"origin_acc": 95.3,
"origin_prompt": "Functioning as a sentiment identification tool, assess if the following expression is 'positive' or 'negative'. Classify:",
"atk_acc": 93.81,
"atk_prompt": "Functioning as a sentiment identification tool, assess if the following expression is 'positive' or 'negative'. Classify and false is not true : ",
"transfer_ori_acc": 38.646788990825684,
"transfer_atk_acc": 36.353211009174316
},
{
"dataset": "wnli",
"type": "task",
"origin_acc": 66.2,
"origin_prompt": "Analyze the two provided sentences and decide if their relationship is 'entailment' or 'not_entailment'.",
"atk_acc": 26.76,
"atk_prompt": "Analyze the two provided sentences and decide if their relationship is 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true .",
"transfer_ori_acc": 7.042253521126761,
"transfer_atk_acc": 29.577464788732392
},
{
"dataset": "wnli",
"type": "role",
"origin_acc": 64.79,
"origin_prompt": "In the capacity of an entailment assessment system, indicate if the link between the following sentences is 'entailment' or 'not_entailment':",
"atk_acc": 25.35,
"atk_prompt": "In the capacity of an entailment assessment system, indicate if the link between the following sentences is 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true :",
"transfer_ori_acc": 43.66197183098591,
"transfer_atk_acc": 33.80281690140845
},
{
"dataset": "mmlu",
"type": "task",
"origin_acc": 45.74,
"origin_prompt": "For each multiple-choice question about {}, identify the correct answer by selecting 'A', 'B', 'C', or 'D'.",
"atk_acc": 44.33,
"atk_prompt": "For each multiple-choice question about {}, identify the correct answer by selecting 'A', 'B', 'C', or 'D' and false is not true . ",
"transfer_ori_acc": 22.69503546099291,
"transfer_atk_acc": 25.177304964539005
},
{
"dataset": "mmlu",
"type": "role",
"origin_acc": 44.86,
"origin_prompt": "Utilizing your expertise in {}, answer the following multiple-choice question by picking 'A', 'B', 'C', or 'D'.",
"atk_acc": 43.97,
"atk_prompt": "Utilizing your expertise in {}, answer the following multiple-choice question by picking 'A', 'B', 'C', or 'D' and false is not true .",
"transfer_ori_acc": 20.74468085106383,
"transfer_atk_acc": 20.74468085106383
}
]