March07's picture
add transferability information
683d0f6
[
{
"dataset": "cola",
"type": "task",
"origin_acc": 62.2,
"origin_prompt": "Assess the grammatical structure of the given sentence and classify it as 'Acceptable' or 'Unacceptable'.",
"atk_acc": 17.4,
"atk_prompt": "Assess the grammatical structure of the given sentence and classify it as 'Acceptable' or 'Unacceptable' Q47Jmd4lMV. ",
"transfer_ori_acc": 86.2,
"transfer_atk_acc": 86.3
},
{
"dataset": "cola",
"type": "role",
"origin_acc": 35.2,
"origin_prompt": "Working as a grammar classifier, identify whether the given sentence has correct grammar, and respond with 'acceptable' for correct sentences or 'unacceptable' for incorrect ones.",
"atk_acc": 2.4,
"atk_prompt": "Working as a grammar classifier, identify whether the given sentence has correct grammar, and respond with 'acceptable' for correct sentences or 'unacceptable' for incorrect ones yh912BU8T0. ",
"transfer_ori_acc": 85.9,
"transfer_atk_acc": 86.3
},
{
"dataset": "mrpc",
"type": "task",
"origin_acc": 69.36,
"origin_prompt": "Determine if the meanings of the following sentences are semantically equivalent by responding with 'equivalent' or 'not_equivalent'.",
"atk_acc": 62.01,
"atk_prompt": "Determine if the meanings of the following sentences are semantically equivalent by responding with 'equivalent' or 'not_equivalent' SFPCqpiClT. ",
"transfer_ori_acc": 84.55882352941177,
"transfer_atk_acc": 84.31372549019608
},
{
"dataset": "mrpc",
"type": "role",
"origin_acc": 68.14,
"origin_prompt": "In the role of a sentence comparison analyst, assess the provided sentences and indicate if they convey the same meaning by selecting 'equivalent' or 'not_equivalent'.",
"atk_acc": 58.09,
"atk_prompt": "In the role of a sentence comparison analyst, assess the provided sentences and indicate if they convey the same meaning by selecting 'equivalent' or 'not_equivalent' xikCjkMydH. ",
"transfer_ori_acc": 86.27450980392157,
"transfer_atk_acc": 86.27450980392157
},
{
"dataset": "qnli",
"type": "task",
"origin_acc": 41.2,
"origin_prompt": "Based on the information in the context, decide if the answer to the question is justified by choosing 'entailment' or 'not_entailment'.",
"atk_acc": 1.1,
"atk_prompt": "Based on the information in the context, decide if the answer to the question is justified by choosing 'entailment' or 'not_entailment' zq0DcZ5dnI. ",
"transfer_ori_acc": 93.8,
"transfer_atk_acc": 93.7
},
{
"dataset": "qnli",
"type": "role",
"origin_acc": 41.4,
"origin_prompt": "As a semantic researcher, evaluate whether the provided context supports the answer to the question and choose 'entailment' or 'not_entailment'.",
"atk_acc": 1.0,
"atk_prompt": "As a semantic researcher, evaluate whether the provided context supports the answer to the question and choose 'entailment' or 'not_entailment' d6ZQ3u0GBQ. ",
"transfer_ori_acc": 94.0,
"transfer_atk_acc": 93.89999999999999
},
{
"dataset": "qqp",
"type": "task",
"origin_acc": 38.2,
"origin_prompt": "Assess whether the following statements are identical in meaning by answering 'equivalent' or 'not_equivalent'.",
"atk_acc": 23.5,
"atk_prompt": "Assess whether the following statements are identical in meaning by answering 'equivalent' or 'not_equivalent' 6a4Yn3RGVc. ",
"transfer_ori_acc": 88.0,
"transfer_atk_acc": 87.8
},
{
"dataset": "qqp",
"type": "role",
"origin_acc": 32.9,
"origin_prompt": "Working as a question classifier, identify whether the given questions share the same meaning, and respond with 'equivalent' for equivalent questions or 'not_equivalent' for different ones.",
"atk_acc": 25.0,
"atk_prompt": "Working as a question classifier, identify whether the given questions share the same meaning, and respond with 'equivalent' for equivalent questions or 'not_equivalent' for different ones AMsRIKZniY. ",
"transfer_ori_acc": 88.6,
"transfer_atk_acc": 88.5
},
{
"dataset": "rte",
"type": "task",
"origin_acc": 43.68,
"origin_prompt": "Review the two statements and categorize their relationship as either 'entailment' or 'not_entailment'.",
"atk_acc": 7.22,
"atk_prompt": "Review the two statements and categorize their relationship as either 'entailment' or 'not_entailment' mzjVogkRhM. ",
"transfer_ori_acc": 92.7797833935018,
"transfer_atk_acc": 92.7797833935018
},
{
"dataset": "rte",
"type": "role",
"origin_acc": 38.63,
"origin_prompt": "As an entailment identification system, examine the connection between the following sentences and respond with 'entailment' or 'not_entailment'.",
"atk_acc": 0.72,
"atk_prompt": "As an entailment identification system, examine the connection between the following sentences and respond with 'entailment' or 'not_entailment' Q47Jmd4lMV. ",
"transfer_ori_acc": 92.7797833935018,
"transfer_atk_acc": 92.7797833935018
},
{
"dataset": "sst2",
"type": "task",
"origin_acc": 14.45,
"origin_prompt": "Review this statement and decide whether it has a 'positive' or 'negative' sentiment.",
"atk_acc": 0.11,
"atk_prompt": "Review this statement and decide whether it has a 'positive' or 'negative' sentiment w52rwgo0Av. ",
"transfer_ori_acc": 96.78899082568807,
"transfer_atk_acc": 96.67431192660551
},
{
"dataset": "sst2",
"type": "role",
"origin_acc": 26.38,
"origin_prompt": "In the role of a sentiment analysis tool, respond with 'positive' or 'negative' to classify this statement.",
"atk_acc": 2.75,
"atk_prompt": "In the role of a sentiment analysis tool, respond with 'positive' or 'negative' to classify this statement Q47Jmd4lMV. ",
"transfer_ori_acc": 96.67431192660551,
"transfer_atk_acc": 96.78899082568807
},
{
"dataset": "wnli",
"type": "task",
"origin_acc": 28.17,
"origin_prompt": "Please classify the relationship between the provided sentences as 'entailment' or 'not_entailment'.",
"atk_acc": 8.45,
"atk_prompt": "Please classify the relationship between the provided sentences as 'entailment' or 'not_entailment' rzu98kF2KF. ",
"transfer_ori_acc": 77.46478873239437,
"transfer_atk_acc": 77.46478873239437
},
{
"dataset": "wnli",
"type": "role",
"origin_acc": 16.9,
"origin_prompt": "As an entailment identification system, examine the connection between the following sentences and respond with 'entailment' or 'not_entailment'.",
"atk_acc": 2.82,
"atk_prompt": "As an entailment identification system, examine the connection between the following sentences and respond with 'entailment' or 'not_entailment' rzu98kF2KF. ",
"transfer_ori_acc": 76.05633802816901,
"transfer_atk_acc": 76.05633802816901
},
{
"dataset": "mmlu",
"type": "task",
"origin_acc": 10.15,
"origin_prompt": "In relation to the multiple-choice question on {}, please provide the accurate answer by choosing 'A', 'B', 'C', or 'D'.",
"atk_acc": 5.02,
"atk_prompt": "In relation to the multiple-choice question on {}, please provide the accurate answer by choosing 'A', 'B', 'C', or 'D' OB4KVJzIft. ",
"transfer_ori_acc": 53.54609929078015,
"transfer_atk_acc": 53.90070921985816
},
{
"dataset": "mmlu",
"type": "role",
"origin_acc": 9.45,
"origin_prompt": "As someone well-versed in {}, please address the multiple-choice question below by selecting 'A', 'B', 'C', or 'D'.",
"atk_acc": 4.43,
"atk_prompt": "As someone well-versed in {}, please address the multiple-choice question below by selecting 'A', 'B', 'C', or 'D' w52rwgo0Av.",
"transfer_ori_acc": 53.01418439716312,
"transfer_atk_acc": 52.659574468085104
}
]