Spaces:

March07
/

PromptBench

Running

App Files Files Community

PromptBench / results_transfer /vicuna-13b_google-flan-ul2 /bertattack_0_shot.json

March07

add transferability information

683d0f6 about 1 year ago

raw

history blame contribute delete

No virus

7.9 kB

	[
	{
	"dataset": "cola",
	"type": "task",
	"origin_acc": 69.0,
	"origin_prompt": "Assess the following sentence and determine if it is grammatically correct. Respond with 'Acceptable' or 'Unacceptable':",
	"atk_acc": 1.7,
	"atk_prompt": "check the below verb and consider if it is grammatically complete. act with 'Acceptable' or 'Unacceptable':",
	"transfer_ori_acc": 86.2,
	"transfer_atk_acc": 80.0
	},
	{
	"dataset": "cola",
	"type": "role",
	"origin_acc": 57.1,
	"origin_prompt": "Functioning as a grammar evaluation tool, analyze the given sentence and decide if it is grammatically correct, responding with 'acceptable' or 'unacceptable':",
	"atk_acc": 0.6,
	"atk_prompt": "as as a grammar evaluation resource, analyze the next phrase and decide if it is grammatical+ correct, respond with 'acceptable' or 'unacceptable':",
	"transfer_ori_acc": 85.9,
	"transfer_atk_acc": 86.0
	},
	{
	"dataset": "mrpc",
	"type": "task",
	"origin_acc": 67.16,
	"origin_prompt": "Are the meanings of the following pair of sentences the same? Answer with 'equivalent' or 'not_equivalent'.",
	"atk_acc": 0.0,
	"atk_prompt": "Are the meanings of the next in of sentences the same? q with 'equivalent' or 'not_equivalent'. ",
	"transfer_ori_acc": 87.74509803921569,
	"transfer_atk_acc": 87.25490196078431
	},
	{
	"dataset": "mrpc",
	"type": "role",
	"origin_acc": 52.21,
	"origin_prompt": "In your capacity as a semantic comparison tool, examine the given sentences and decide if their meanings are identical, responding with 'equivalent' or 'not_equivalent'.",
	"atk_acc": 0.0,
	"atk_prompt": "[UNK] your capacity as a semantic comparison tool, examine the given sentences and decide if their meanings are different, responding with 'equivalent' or 'not_equivalent'. ",
	"transfer_ori_acc": 87.00980392156863,
	"transfer_atk_acc": 83.57843137254902
	},
	{
	"dataset": "qnli",
	"type": "task",
	"origin_acc": 30.5,
	"origin_prompt": "Review the given context and question, and decide if the context contains enough information to support the answer by selecting 'entailment' or 'not_entailment'.",
	"atk_acc": 0.0,
	"atk_prompt": "Review the given context and question, and decide if the context requires enough information to let the answer by choices 'entailment' or 'not_entailment'. ",
	"transfer_ori_acc": 94.1,
	"transfer_atk_acc": 94.19999999999999
	},
	{
	"dataset": "qnli",
	"type": "role",
	"origin_acc": 38.4,
	"origin_prompt": "As a textual inference expert, analyze if the answer to the question can be deduced from the provided context and select 'entailment' or 'not_entailment'.",
	"atk_acc": 0.4,
	"atk_prompt": "example a textual in expert, analyzing if the answer to the question can be deduced from the language context and think 'entailment' or 'not_entailment'. ",
	"transfer_ori_acc": 93.30000000000001,
	"transfer_atk_acc": 93.30000000000001
	},
	{
	"dataset": "qqp",
	"type": "task",
	"origin_acc": 23.9,
	"origin_prompt": "Can these two statements be considered equal in meaning? Answer with 'equivalent' or 'not_equivalent'.",
	"atk_acc": 0.8,
	"atk_prompt": "can these two symbols be considered equal in meaning? object with 'equivalent' or 'not_equivalent'. ",
	"transfer_ori_acc": 87.4,
	"transfer_atk_acc": 87.6
	},
	{
	"dataset": "qqp",
	"type": "role",
	"origin_acc": 0,
	"origin_prompt": "",
	"atk_acc": 100,
	"atk_prompt": "",
	"transfer_ori_acc": 0.0,
	"transfer_atk_acc": 0.0
	},
	{
	"dataset": "rte",
	"type": "task",
	"origin_acc": 47.65,
	"origin_prompt": "Please classify the relationship between the provided sentences as 'entailment' or 'not_entailment'.",
	"atk_acc": 0.0,
	"atk_prompt": "Please approach the relationship between the included sentences as 'entailment' or 'not_entailment'.",
	"transfer_ori_acc": 93.14079422382672,
	"transfer_atk_acc": 92.7797833935018
	},
	{
	"dataset": "rte",
	"type": "role",
	"origin_acc": 48.01,
	"origin_prompt": "As an entailment identification system, examine the connection between the following sentences and respond with 'entailment' or 'not_entailment':",
	"atk_acc": 1.44,
	"atk_prompt": "she an entailment identification system, presented the connection between the following sentences and answered with 'entailment' or 'not_entailment':",
	"transfer_ori_acc": 92.7797833935018,
	"transfer_atk_acc": 92.4187725631769
	},
	{
	"dataset": "sst2",
	"type": "task",
	"origin_acc": 48.17,
	"origin_prompt": "After examining the following expression, label its emotion as either 'positive' or 'negative':",
	"atk_acc": 0.57,
	"atk_prompt": "After examination the given expression, type its emotion as of 'positive' or 'negative': ",
	"transfer_ori_acc": 91.97247706422019,
	"transfer_atk_acc": 92.88990825688074
	},
	{
	"dataset": "sst2",
	"type": "role",
	"origin_acc": 37.27,
	"origin_prompt": "As an emotion detector, determine if the provided passage conveys a 'positive' or 'negative' sentiment. Classify:",
	"atk_acc": 0.46,
	"atk_prompt": "As an emotion indicating, score if the provided reply conveys a 'positive' or 'negative' sentiment. classify: ",
	"transfer_ori_acc": 95.87155963302753,
	"transfer_atk_acc": 95.06880733944955
	},
	{
	"dataset": "wnli",
	"type": "task",
	"origin_acc": 43.66,
	"origin_prompt": "Determine if the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment'.",
	"atk_acc": 0.0,
	"atk_prompt": "Determine if the first pair of sentences demonstrate entailment or not_entailment. replied with 'entailment' or 'not_entailment'.",
	"transfer_ori_acc": 78.87323943661971,
	"transfer_atk_acc": 78.87323943661971
	},
	{
	"dataset": "wnli",
	"type": "role",
	"origin_acc": 43.66,
	"origin_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment':",
	"atk_acc": 0.0,
	"atk_prompt": "In your role as an entailment analysis tool, assessing the relationship between the formed sentences and classify it as 'entailment' or 'not_entailment':",
	"transfer_ori_acc": 78.87323943661971,
	"transfer_atk_acc": 78.87323943661971
	},
	{
	"dataset": "mmlu",
	"type": "task",
	"origin_acc": 25.09,
	"origin_prompt": "Answer the subsequent multiple-choice question about {} by picking the right option among 'A', 'B', 'C', or 'D'.",
	"atk_acc": 5.02,
	"atk_prompt": "Answer the next multiple-s question about {} by picking the incorrect button or 'A', 'B', 'C', or 'D'. ",
	"transfer_ori_acc": 52.4822695035461,
	"transfer_atk_acc": 15.602836879432624
	},
	{
	"dataset": "mmlu",
	"type": "role",
	"origin_acc": 24.85,
	"origin_prompt": "As an expert in {}, respond to the following multiple-choice question by selecting 'A', 'B', 'C', or 'D'.",
	"atk_acc": 6.88,
	"atk_prompt": "As an expert in {}, answer to the following multiple-guessing contestant by guessing 'A', 'B', 'C', or 'D'.",
	"transfer_ori_acc": 53.191489361702125,
	"transfer_atk_acc": 51.41843971631206
	}
	]