Spaces:

March07
/

PromptBench

Runtime error

App Files Files Community

PromptBench / results_transfer /vicuna-13b_google-flan-ul2 /textfooler_0_shot.json

March07

add transferability information

683d0f6 10 months ago

raw history blame contribute delete

No virus

8.3 kB

	[
	{
	"dataset": "cola",
	"type": "task",
	"origin_acc": 69.0,
	"origin_prompt": "Assess the following sentence and determine if it is grammatically correct. Respond with 'Acceptable' or 'Unacceptable':",
	"atk_acc": 14.0,
	"atk_prompt": "Assess the next sentence and decide if it is grammatically correct. Accommodate with 'Acceptable' or 'Unacceptable':",
	"transfer_ori_acc": 86.2,
	"transfer_atk_acc": 85.3
	},
	{
	"dataset": "cola",
	"type": "role",
	"origin_acc": 54.9,
	"origin_prompt": "As a tool for determining grammatical correctness, review the sentence and categorize its grammar as either 'acceptable' or 'unacceptable':",
	"atk_acc": 0.0,
	"atk_prompt": "Oj a toolbox for defines pronouns correcting, revisions the penalties and sorting its dictionary as either 'acceptable' or 'unacceptable':",
	"transfer_ori_acc": 85.7,
	"transfer_atk_acc": 83.7
	},
	{
	"dataset": "mrpc",
	"type": "task",
	"origin_acc": 67.16,
	"origin_prompt": "Are the meanings of the following pair of sentences the same? Answer with 'equivalent' or 'not_equivalent'.",
	"atk_acc": 0.49,
	"atk_prompt": "Are the meanings of the following couple of sentences the same? Cope with 'equivalent' or 'not_equivalent'. ",
	"transfer_ori_acc": 87.74509803921569,
	"transfer_atk_acc": 87.25490196078431
	},
	{
	"dataset": "mrpc",
	"type": "role",
	"origin_acc": 39.46,
	"origin_prompt": "In the role of a textual comparison specialist, examine the given sentences and decide if they share the same meaning, responding with 'equivalent' or 'not_equivalent'.",
	"atk_acc": 0.0,
	"atk_prompt": "About the role of a textual comparison specialist, examine the given judgements and decide if they exchanging the same meaning, responses with 'equivalent' or 'not_equivalent'. ",
	"transfer_ori_acc": 87.25490196078431,
	"transfer_atk_acc": 84.31372549019608
	},
	{
	"dataset": "qnli",
	"type": "task",
	"origin_acc": 30.5,
	"origin_prompt": "Review the given context and question, and decide if the context contains enough information to support the answer by selecting 'entailment' or 'not_entailment'.",
	"atk_acc": 0.0,
	"atk_prompt": "Reconsidered the given context and question, and ascertain if the context comprises enough information to favour the answer by choices 'entailment' or 'not_entailment'. ",
	"transfer_ori_acc": 94.1,
	"transfer_atk_acc": 94.39999999999999
	},
	{
	"dataset": "qnli",
	"type": "role",
	"origin_acc": 35.3,
	"origin_prompt": "As a language expert, assess if the given context entails the answer to the question and respond with 'entailment' or 'not_entailment'.",
	"atk_acc": 0.6,
	"atk_prompt": "As a language expert, weighed if the delivered framework translates the answer to the question and respondent with 'entailment' or 'not_entailment'. ",
	"transfer_ori_acc": 94.19999999999999,
	"transfer_atk_acc": 94.19999999999999
	},
	{
	"dataset": "qqp",
	"type": "task",
	"origin_acc": 23.9,
	"origin_prompt": "Can these two statements be considered equal in meaning? Answer with 'equivalent' or 'not_equivalent'.",
	"atk_acc": 0.0,
	"atk_prompt": "Kan these two languages happen revisited equitable in signify? Meet with 'equivalent' or 'not_equivalent'. ",
	"transfer_ori_acc": 87.4,
	"transfer_atk_acc": 87.6
	},
	{
	"dataset": "qqp",
	"type": "role",
	"origin_acc": 16.9,
	"origin_prompt": "In the role of a question similarity analyzer, examine the meaning of the provided questions and decide if they are equivalent, answering with 'equivalent' for equivalent questions or 'not_equivalent' for different questions.",
	"atk_acc": 0.1,
	"atk_prompt": "Onto the role of a question similarity analyzer, consider the significance of the envisaged matters and identify if they are equivalent, answering with 'equivalent' for equivalent questions or 'not_equivalent' for many matters. ",
	"transfer_ori_acc": 88.1,
	"transfer_atk_acc": 87.9
	},
	{
	"dataset": "rte",
	"type": "task",
	"origin_acc": 47.65,
	"origin_prompt": "Please classify the relationship between the provided sentences as 'entailment' or 'not_entailment'.",
	"atk_acc": 0.72,
	"atk_prompt": "Invite classed the coitus between the foreseen priors as 'entailment' or 'not_entailment'.",
	"transfer_ori_acc": 93.14079422382672,
	"transfer_atk_acc": 91.33574007220217
	},
	{
	"dataset": "rte",
	"type": "role",
	"origin_acc": 48.01,
	"origin_prompt": "As an entailment identification system, examine the connection between the following sentences and respond with 'entailment' or 'not_entailment':",
	"atk_acc": 0.0,
	"atk_prompt": "Because an entailment characterization regimes, considering the login between the following sentence and meet with 'entailment' or 'not_entailment':",
	"transfer_ori_acc": 92.7797833935018,
	"transfer_atk_acc": 93.14079422382672
	},
	{
	"dataset": "sst2",
	"type": "task",
	"origin_acc": 53.9,
	"origin_prompt": "Please identify the emotional tone of this passage: 'positive' or 'negative'?",
	"atk_acc": 5.85,
	"atk_prompt": "Invited specify the affective tone of this pass: 'positive' or 'negative'? ",
	"transfer_ori_acc": 96.10091743119266,
	"transfer_atk_acc": 96.10091743119266
	},
	{
	"dataset": "sst2",
	"type": "role",
	"origin_acc": 42.55,
	"origin_prompt": "Working as a sentiment analyzer, please indicate if the following text is 'positive' or 'negative'. Classify:",
	"atk_acc": 3.21,
	"atk_prompt": "Working as a sentiment analyzer, please indicate if the following text is 'positive' or 'negative'. Ranked: ",
	"transfer_ori_acc": 96.44495412844036,
	"transfer_atk_acc": 96.3302752293578
	},
	{
	"dataset": "wnli",
	"type": "task",
	"origin_acc": 43.66,
	"origin_prompt": "Determine if the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment'.",
	"atk_acc": 1.41,
	"atk_prompt": "Define if the given pair of priors exhibits entailment or not_entailment. Answers with 'entailment' or 'not_entailment'.",
	"transfer_ori_acc": 78.87323943661971,
	"transfer_atk_acc": 77.46478873239437
	},
	{
	"dataset": "wnli",
	"type": "role",
	"origin_acc": 43.66,
	"origin_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment':",
	"atk_acc": 0.0,
	"atk_prompt": "Onto your role as an entailment analysing tool, appraisals the relationship between the given condemnation and classed it as 'entailment' or 'not_entailment':",
	"transfer_ori_acc": 78.87323943661971,
	"transfer_atk_acc": 78.87323943661971
	},
	{
	"dataset": "mmlu",
	"type": "task",
	"origin_acc": 27.07,
	"origin_prompt": "For each multiple-choice question about {}, identify the correct answer by selecting 'A', 'B', 'C', or 'D'.",
	"atk_acc": 12.72,
	"atk_prompt": "Among all multiple-choice subject about {}, find the accurate answer by chooses 'A', 'B', 'C', or 'D'. ",
	"transfer_ori_acc": 51.77304964539007,
	"transfer_atk_acc": 52.4822695035461
	},
	{
	"dataset": "mmlu",
	"type": "role",
	"origin_acc": 24.85,
	"origin_prompt": "As an expert in {}, respond to the following multiple-choice question by selecting 'A', 'B', 'C', or 'D'.",
	"atk_acc": 13.89,
	"atk_prompt": "As an expert in {}, answering to the following multiple-choice things by picking 'A', 'B', 'C', or 'D'.",
	"transfer_ori_acc": 53.191489361702125,
	"transfer_atk_acc": 52.304964539007095
	}
	]