Spaces:
Runtime error
Runtime error
[ | |
{ | |
"dataset": "cola", | |
"type": "task", | |
"origin_acc": 69.0, | |
"origin_prompt": "Assess the following sentence and determine if it is grammatically correct. Respond with 'Acceptable' or 'Unacceptable':", | |
"atk_acc": 1.7, | |
"atk_prompt": "check the below verb and consider if it is grammatically complete. act with 'Acceptable' or 'Unacceptable':", | |
"transfer_ori_acc": 78.0, | |
"transfer_atk_acc": 42.6 | |
}, | |
{ | |
"dataset": "cola", | |
"type": "role", | |
"origin_acc": 57.1, | |
"origin_prompt": "Functioning as a grammar evaluation tool, analyze the given sentence and decide if it is grammatically correct, responding with 'acceptable' or 'unacceptable':", | |
"atk_acc": 0.6, | |
"atk_prompt": "as as a grammar evaluation resource, analyze the next phrase and decide if it is grammatical+ correct, respond with 'acceptable' or 'unacceptable':", | |
"transfer_ori_acc": 76.3, | |
"transfer_atk_acc": 75.8 | |
}, | |
{ | |
"dataset": "mrpc", | |
"type": "task", | |
"origin_acc": 67.16, | |
"origin_prompt": "Are the meanings of the following pair of sentences the same? Answer with 'equivalent' or 'not_equivalent'.", | |
"atk_acc": 0.0, | |
"atk_prompt": "Are the meanings of the next in of sentences the same? q with 'equivalent' or 'not_equivalent'. ", | |
"transfer_ori_acc": 82.1078431372549, | |
"transfer_atk_acc": 81.12745098039215 | |
}, | |
{ | |
"dataset": "mrpc", | |
"type": "role", | |
"origin_acc": 52.21, | |
"origin_prompt": "In your capacity as a semantic comparison tool, examine the given sentences and decide if their meanings are identical, responding with 'equivalent' or 'not_equivalent'.", | |
"atk_acc": 0.0, | |
"atk_prompt": "[UNK] your capacity as a semantic comparison tool, examine the given sentences and decide if their meanings are different, responding with 'equivalent' or 'not_equivalent'. ", | |
"transfer_ori_acc": 83.33333333333334, | |
"transfer_atk_acc": 80.3921568627451 | |
}, | |
{ | |
"dataset": "qnli", | |
"type": "task", | |
"origin_acc": 30.5, | |
"origin_prompt": "Review the given context and question, and decide if the context contains enough information to support the answer by selecting 'entailment' or 'not_entailment'.", | |
"atk_acc": 0.0, | |
"atk_prompt": "Review the given context and question, and decide if the context requires enough information to let the answer by choices 'entailment' or 'not_entailment'. ", | |
"transfer_ori_acc": 85.2, | |
"transfer_atk_acc": 77.3 | |
}, | |
{ | |
"dataset": "qnli", | |
"type": "role", | |
"origin_acc": 38.4, | |
"origin_prompt": "As a textual inference expert, analyze if the answer to the question can be deduced from the provided context and select 'entailment' or 'not_entailment'.", | |
"atk_acc": 0.4, | |
"atk_prompt": "example a textual in expert, analyzing if the answer to the question can be deduced from the language context and think 'entailment' or 'not_entailment'. ", | |
"transfer_ori_acc": 78.10000000000001, | |
"transfer_atk_acc": 65.10000000000001 | |
}, | |
{ | |
"dataset": "qqp", | |
"type": "task", | |
"origin_acc": 23.9, | |
"origin_prompt": "Can these two statements be considered equal in meaning? Answer with 'equivalent' or 'not_equivalent'.", | |
"atk_acc": 0.8, | |
"atk_prompt": "can these two symbols be considered equal in meaning? object with 'equivalent' or 'not_equivalent'. ", | |
"transfer_ori_acc": 87.8, | |
"transfer_atk_acc": 80.5 | |
}, | |
{ | |
"dataset": "qqp", | |
"type": "role", | |
"origin_acc": 0, | |
"origin_prompt": "", | |
"atk_acc": 100, | |
"atk_prompt": "", | |
"transfer_ori_acc": 0.0, | |
"transfer_atk_acc": 0.0 | |
}, | |
{ | |
"dataset": "rte", | |
"type": "task", | |
"origin_acc": 47.65, | |
"origin_prompt": "Please classify the relationship between the provided sentences as 'entailment' or 'not_entailment'.", | |
"atk_acc": 0.0, | |
"atk_prompt": "Please approach the relationship between the included sentences as 'entailment' or 'not_entailment'.", | |
"transfer_ori_acc": 85.1985559566787, | |
"transfer_atk_acc": 84.47653429602889 | |
}, | |
{ | |
"dataset": "rte", | |
"type": "role", | |
"origin_acc": 48.01, | |
"origin_prompt": "As an entailment identification system, examine the connection between the following sentences and respond with 'entailment' or 'not_entailment':", | |
"atk_acc": 1.44, | |
"atk_prompt": "she an entailment identification system, presented the connection between the following sentences and answered with 'entailment' or 'not_entailment':", | |
"transfer_ori_acc": 84.11552346570397, | |
"transfer_atk_acc": 83.39350180505414 | |
}, | |
{ | |
"dataset": "sst2", | |
"type": "task", | |
"origin_acc": 48.17, | |
"origin_prompt": "After examining the following expression, label its emotion as either 'positive' or 'negative':", | |
"atk_acc": 0.57, | |
"atk_prompt": "After examination the given expression, type its emotion as of 'positive' or 'negative': ", | |
"transfer_ori_acc": 93.23394495412845, | |
"transfer_atk_acc": 93.46330275229357 | |
}, | |
{ | |
"dataset": "sst2", | |
"type": "role", | |
"origin_acc": 37.27, | |
"origin_prompt": "As an emotion detector, determine if the provided passage conveys a 'positive' or 'negative' sentiment. Classify:", | |
"atk_acc": 0.46, | |
"atk_prompt": "As an emotion indicating, score if the provided reply conveys a 'positive' or 'negative' sentiment. classify: ", | |
"transfer_ori_acc": 94.83944954128441, | |
"transfer_atk_acc": 94.95412844036697 | |
}, | |
{ | |
"dataset": "wnli", | |
"type": "task", | |
"origin_acc": 43.66, | |
"origin_prompt": "Determine if the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment'.", | |
"atk_acc": 0.0, | |
"atk_prompt": "Determine if the first pair of sentences demonstrate entailment or not_entailment. replied with 'entailment' or 'not_entailment'.", | |
"transfer_ori_acc": 61.97183098591549, | |
"transfer_atk_acc": 61.97183098591549 | |
}, | |
{ | |
"dataset": "wnli", | |
"type": "role", | |
"origin_acc": 43.66, | |
"origin_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment':", | |
"atk_acc": 0.0, | |
"atk_prompt": "In your role as an entailment analysis tool, assessing the relationship between the formed sentences and classify it as 'entailment' or 'not_entailment':", | |
"transfer_ori_acc": 56.33802816901409, | |
"transfer_atk_acc": 54.929577464788736 | |
}, | |
{ | |
"dataset": "mmlu", | |
"type": "task", | |
"origin_acc": 25.09, | |
"origin_prompt": "Answer the subsequent multiple-choice question about {} by picking the right option among 'A', 'B', 'C', or 'D'.", | |
"atk_acc": 5.02, | |
"atk_prompt": "Answer the next multiple-s question about {} by picking the incorrect button or 'A', 'B', 'C', or 'D'. ", | |
"transfer_ori_acc": 44.858156028368796, | |
"transfer_atk_acc": 24.645390070921984 | |
}, | |
{ | |
"dataset": "mmlu", | |
"type": "role", | |
"origin_acc": 24.85, | |
"origin_prompt": "As an expert in {}, respond to the following multiple-choice question by selecting 'A', 'B', 'C', or 'D'.", | |
"atk_acc": 6.88, | |
"atk_prompt": "As an expert in {}, answer to the following multiple-guessing contestant by guessing 'A', 'B', 'C', or 'D'.", | |
"transfer_ori_acc": 44.680851063829785, | |
"transfer_atk_acc": 44.858156028368796 | |
} | |
] |