Spaces:

March07
/

PromptBench

Runtime error

App Files Files Community

PromptBench / results_transfer /chatgpt_google-flan-ul2 /translation_0_shot.json

March07

add transferability information

683d0f6 about 1 year ago

raw

history blame

5.94 kB

	[
	{
	"dataset": "cola",
	"type": "translation",
	"origin_acc": 79.5,
	"origin_prompt": "Functioning as a grammar evaluation tool, analyze the given sentence and decide if it is grammatically correct, responding with 'acceptable' or 'unacceptable':",
	"atk_acc": 25.0,
	"atk_prompt": "Classify the grammar of a given sentence into two categories, 'acceptable' or 'unacceptable', and judge its accuracy.",
	"transfer_ori_acc": 85.9,
	"transfer_atk_acc": 84.8
	},
	{
	"dataset": "mrpc",
	"type": "translation",
	"origin_acc": 76.5,
	"origin_prompt": "Can the given sentences be considered semantically identical? Please reply with 'equivalent' or 'not_equivalent'.",
	"atk_acc": 7.0,
	"atk_prompt": "As an expert in semantic comparison, assess the similarity between the following two sentences and determine whether they are 'equivalent' or 'not_equivalent'.",
	"transfer_ori_acc": 87.00980392156863,
	"transfer_atk_acc": 88.97058823529412
	},
	{
	"dataset": "qnli",
	"type": "translation",
	"origin_acc": 79.5,
	"origin_prompt": "Assess if the answer to the question can be logically concluded from the provided context by choosing 'entailment' or 'not_entailment'.",
	"atk_acc": 56.5,
	"atk_prompt": "Check the text link to the question and answer 'entailment' or 'not_entailment', depending on your language skills.",
	"transfer_ori_acc": 94.0,
	"transfer_atk_acc": 93.8
	},
	{
	"dataset": "qqp",
	"type": "translation",
	"origin_acc": 84.0,
	"origin_prompt": "Determine if the given pair of statements can be considered the same by responding with 'equivalent' or 'not_equivalent'.",
	"atk_acc": 3.0,
	"atk_prompt": "As a tool for determining the equivalence of problems, the problems are examined and their similarity is classified as 'equivalent' or 'not_equivalent'.",
	"transfer_ori_acc": 88.1,
	"transfer_atk_acc": 87.3
	},
	{
	"dataset": "rte",
	"type": "translation",
	"origin_acc": 84.0,
	"origin_prompt": "Determine if the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment'.",
	"atk_acc": 39.0,
	"atk_prompt": "Determine whether the given sentences have a cause-effect relationship as an engagement analysis tool and categorize them as 'entailment' or 'not_entailment'.",
	"transfer_ori_acc": 93.50180505415162,
	"transfer_atk_acc": 92.05776173285199
	},
	{
	"dataset": "sst2",
	"type": "translation",
	"origin_acc": 96.0,
	"origin_prompt": "Read the provided excerpt and choose between 'positive' and 'negative' to describe its sentiment:",
	"atk_acc": 29.5,
	"atk_prompt": "In the context of textual analysis, what classification is this phrase between 'positive' and 'negative'?",
	"transfer_ori_acc": 96.44495412844036,
	"transfer_atk_acc": 96.3302752293578
	},
	{
	"dataset": "wnli",
	"type": "translation",
	"origin_acc": 67.61,
	"origin_prompt": "Examine the pair of sentences and determine if they exhibit entailment or not_entailment. Answer with either 'entailment' or 'not_entailment'.",
	"atk_acc": 26.76,
	"atk_prompt": "Using implication analysis, determine whether the given sentences have a cause-effect relationship and categorize them as 'entailment' or 'not_entailment'.",
	"transfer_ori_acc": 78.87323943661971,
	"transfer_atk_acc": 76.05633802816901
	},
	{
	"dataset": "mmlu",
	"type": "translation",
	"origin_acc": 64.0,
	"origin_prompt": "In relation to the multiple-choice question on {}, please provide the accurate answer by choosing 'A', 'B', 'C', or 'D'.",
	"atk_acc": 49.5,
	"atk_prompt": "Choose the correct answer to the multiple-choice question below. Is '{}' an 'A', 'B', 'C', or 'D'.",
	"transfer_ori_acc": 51.95035460992907,
	"transfer_atk_acc": 36.52482269503546
	},
	{
	"dataset": "squad_v2",
	"type": "translation",
	"origin_acc": 72.37,
	"origin_prompt": "Discover the best answer based on the context. If the context doesn't include an answer, respond with 'unanswerable'.",
	"atk_acc": 46.08,
	"atk_prompt": "Please tell me what your question is about. If there is no context in which you can provide an answer, respond with 'unanswerable'.",
	"transfer_ori_acc": 89.38229949874687,
	"transfer_atk_acc": 90.04896616541353
	},
	{
	"dataset": "iwslt",
	"type": "translation",
	"origin_acc": 0.22,
	"origin_prompt": "Using machine translation, convert the given sentence from {} into the {} language:",
	"atk_acc": 0.11,
	"atk_prompt": "Please convert the sentence below to {}, and then translate it to {},",
	"transfer_ori_acc": 0.17087237648129655,
	"transfer_atk_acc": 0.16364025086187875
	},
	{
	"dataset": "un_multi",
	"type": "translation",
	"origin_acc": 0.34,
	"origin_prompt": "Perform a machine translation for the provided sentence, changing it from {} to {}:",
	"atk_acc": 0.16,
	"atk_prompt": "Please convert the sentence below to {}, and then translate it to {},",
	"transfer_ori_acc": 0.3102455120610509,
	"transfer_atk_acc": 0.3111101089578496
	},
	{
	"dataset": "math",
	"type": "translation",
	"origin_acc": 6.88,
	"origin_prompt": "Calculate the answer to the following math query about {}:",
	"atk_acc": 3.75,
	"atk_prompt": "Use {} to solve the following mathematical problem.",
	"transfer_ori_acc": 14.374999999999998,
	"transfer_atk_acc": 13.750000000000002
	}
	]