Spaces:
Runtime error
Runtime error
[ | |
{ | |
"dataset": "cola", | |
"type": "translation", | |
"origin_acc": 79.5, | |
"origin_prompt": "Functioning as a grammar evaluation tool, analyze the given sentence and decide if it is grammatically correct, responding with 'acceptable' or 'unacceptable':", | |
"atk_acc": 25.0, | |
"atk_prompt": "Classify the grammar of a given sentence into two categories, 'acceptable' or 'unacceptable', and judge its accuracy.", | |
"transfer_ori_acc": 76.3, | |
"transfer_atk_acc": 75.3 | |
}, | |
{ | |
"dataset": "mrpc", | |
"type": "translation", | |
"origin_acc": 76.5, | |
"origin_prompt": "Can the given sentences be considered semantically identical? Please reply with 'equivalent' or 'not_equivalent'.", | |
"atk_acc": 7.0, | |
"atk_prompt": "As an expert in semantic comparison, assess the similarity between the following two sentences and determine whether they are 'equivalent' or 'not_equivalent'.", | |
"transfer_ori_acc": 80.63725490196079, | |
"transfer_atk_acc": 83.82352941176471 | |
}, | |
{ | |
"dataset": "qnli", | |
"type": "translation", | |
"origin_acc": 79.5, | |
"origin_prompt": "Assess if the answer to the question can be logically concluded from the provided context by choosing 'entailment' or 'not_entailment'.", | |
"atk_acc": 56.5, | |
"atk_prompt": "Check the text link to the question and answer 'entailment' or 'not_entailment', depending on your language skills.", | |
"transfer_ori_acc": 84.8, | |
"transfer_atk_acc": 56.3 | |
}, | |
{ | |
"dataset": "qqp", | |
"type": "translation", | |
"origin_acc": 84.0, | |
"origin_prompt": "Determine if the given pair of statements can be considered the same by responding with 'equivalent' or 'not_equivalent'.", | |
"atk_acc": 3.0, | |
"atk_prompt": "As a tool for determining the equivalence of problems, the problems are examined and their similarity is classified as 'equivalent' or 'not_equivalent'.", | |
"transfer_ori_acc": 87.6, | |
"transfer_atk_acc": 72.0 | |
}, | |
{ | |
"dataset": "rte", | |
"type": "translation", | |
"origin_acc": 84.0, | |
"origin_prompt": "Determine if the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment'.", | |
"atk_acc": 39.0, | |
"atk_prompt": "Determine whether the given sentences have a cause-effect relationship as an engagement analysis tool and categorize them as 'entailment' or 'not_entailment'.", | |
"transfer_ori_acc": 84.11552346570397, | |
"transfer_atk_acc": 85.5595667870036 | |
}, | |
{ | |
"dataset": "sst2", | |
"type": "translation", | |
"origin_acc": 96.0, | |
"origin_prompt": "Read the provided excerpt and choose between 'positive' and 'negative' to describe its sentiment:", | |
"atk_acc": 29.5, | |
"atk_prompt": "In the context of textual analysis, what classification is this phrase between 'positive' and 'negative'?", | |
"transfer_ori_acc": 94.83944954128441, | |
"transfer_atk_acc": 94.15137614678899 | |
}, | |
{ | |
"dataset": "wnli", | |
"type": "translation", | |
"origin_acc": 67.61, | |
"origin_prompt": "Examine the pair of sentences and determine if they exhibit entailment or not_entailment. Answer with either 'entailment' or 'not_entailment'.", | |
"atk_acc": 26.76, | |
"atk_prompt": "Using implication analysis, determine whether the given sentences have a cause-effect relationship and categorize them as 'entailment' or 'not_entailment'.", | |
"transfer_ori_acc": 61.97183098591549, | |
"transfer_atk_acc": 59.154929577464785 | |
}, | |
{ | |
"dataset": "mmlu", | |
"type": "translation", | |
"origin_acc": 64.0, | |
"origin_prompt": "In relation to the multiple-choice question on {}, please provide the accurate answer by choosing 'A', 'B', 'C', or 'D'.", | |
"atk_acc": 49.5, | |
"atk_prompt": "Choose the correct answer to the multiple-choice question below. Is '{}' an 'A', 'B', 'C', or 'D'.", | |
"transfer_ori_acc": 44.858156028368796, | |
"transfer_atk_acc": 43.262411347517734 | |
}, | |
{ | |
"dataset": "squad_v2", | |
"type": "translation", | |
"origin_acc": 72.37, | |
"origin_prompt": "Discover the best answer based on the context. If the context doesn't include an answer, respond with 'unanswerable'.", | |
"atk_acc": 46.08, | |
"atk_prompt": "Please tell me what your question is about. If there is no context in which you can provide an answer, respond with 'unanswerable'.", | |
"transfer_ori_acc": 87.54992770387507, | |
"transfer_atk_acc": 87.42265497660236 | |
}, | |
{ | |
"dataset": "iwslt", | |
"type": "translation", | |
"origin_acc": 0.22, | |
"origin_prompt": "Using machine translation, convert the given sentence from {} into the {} language:", | |
"atk_acc": 0.11, | |
"atk_prompt": "Please convert the sentence below to {}, and then translate it to {},", | |
"transfer_ori_acc": 0.13366423747896072, | |
"transfer_atk_acc": 0.13525124600739777 | |
}, | |
{ | |
"dataset": "un_multi", | |
"type": "translation", | |
"origin_acc": 0.34, | |
"origin_prompt": "Perform a machine translation for the provided sentence, changing it from {} to {}:", | |
"atk_acc": 0.16, | |
"atk_prompt": "Please convert the sentence below to {}, and then translate it to {},", | |
"transfer_ori_acc": 0.26814405340158265, | |
"transfer_atk_acc": 0.2698370828666449 | |
}, | |
{ | |
"dataset": "math", | |
"type": "translation", | |
"origin_acc": 6.88, | |
"origin_prompt": "Calculate the answer to the following math query about {}:", | |
"atk_acc": 3.75, | |
"atk_prompt": "Use {} to solve the following mathematical problem.", | |
"transfer_ori_acc": 13.125, | |
"transfer_atk_acc": 15.625 | |
} | |
] |