Spaces:

March07
/

PromptBench

Runtime error

App Files Files Community

PromptBench / results_transfer /chatgpt_google-flan-t5-large /bertattack_0_shot.json

March07

add transferability information

683d0f6 over 1 year ago

raw

history blame

12.2 kB

	[
	{
	"dataset": "cola",
	"type": "task",
	"origin_acc": 78.5,
	"origin_prompt": "Assess the grammatical structure of the given sentence and classify it as 'Acceptable' or 'Unacceptable':",
	"atk_acc": 0.0,
	"atk_prompt": "what the grammatical structure of the given sentence and consider it as 'Acceptable' or 'Unacceptable':",
	"transfer_ori_acc": 75.5,
	"transfer_atk_acc": 76.3
	},
	{
	"dataset": "cola",
	"type": "role",
	"origin_acc": 80.0,
	"origin_prompt": "While performing grammar analysis, classify the grammar of the following sentence as 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar:",
	"atk_acc": 0.0,
	"atk_prompt": "While performing grammar analysis, classify the grammar of the following sentence as 'acceptable' for so grammar or 'unacceptable' for abnormal grammar:",
	"transfer_ori_acc": 75.5,
	"transfer_atk_acc": 73.3
	},
	{
	"dataset": "mrpc",
	"type": "task",
	"origin_acc": 75.5,
	"origin_prompt": "Examine the pair of sentences and decide if their meanings are identical by answering with 'equivalent' or 'not_equivalent'.",
	"atk_acc": 56.0,
	"atk_prompt": "Examine the pairs of pronouns and evaluate if their meanings are different by answering with 'equivalent' or 'not_equivalent'. ",
	"transfer_ori_acc": 82.1078431372549,
	"transfer_atk_acc": 71.81372549019608
	},
	{
	"dataset": "mrpc",
	"type": "role",
	"origin_acc": 75.5,
	"origin_prompt": "In your capacity as a semantic assessment tool, evaluate the provided sentences and classify their meanings as 'equivalent' or 'not_equivalent'.",
	"atk_acc": 0.0,
	"atk_prompt": "In your capacity as a semantic assessment tool, evaluate the provided tense and classify their constituents as 'equivalent' or 'not_equivalent'. ",
	"transfer_ori_acc": 76.47058823529412,
	"transfer_atk_acc": 79.41176470588235
	},
	{
	"dataset": "qnli",
	"type": "task",
	"origin_acc": 79.0,
	"origin_prompt": "Assess if the answer to the question can be logically concluded from the provided context by choosing 'entailment' or 'not_entailment'.",
	"atk_acc": 0.0,
	"atk_prompt": "and if the answer to the question can be logically concluded from the provided context by avoiding 'entailment' or 'not_entailment'. ",
	"transfer_ori_acc": 84.8,
	"transfer_atk_acc": 31.5
	},
	{
	"dataset": "qnli",
	"type": "role",
	"origin_acc": 79.5,
	"origin_prompt": "As a textual inference expert, analyze if the answer to the question can be deduced from the provided context and select 'entailment' or 'not_entailment'.",
	"atk_acc": 0.0,
	"atk_prompt": "As a textual inference expert, analysis if the answer to the question can be deduced from the provided citation and define 'entailment' or 'not_entailment'. ",
	"transfer_ori_acc": 78.10000000000001,
	"transfer_atk_acc": 60.5
	},
	{
	"dataset": "qqp",
	"type": "task",
	"origin_acc": 79.5,
	"origin_prompt": "Can these two statements be considered equal in meaning? Answer with 'equivalent' or 'not_equivalent'.",
	"atk_acc": 1.0,
	"atk_prompt": "Can these two constructions be considered equal in meaning? object with 'equivalent' or 'not_equivalent'. ",
	"transfer_ori_acc": 87.8,
	"transfer_atk_acc": 68.89999999999999
	},
	{
	"dataset": "qqp",
	"type": "role",
	"origin_acc": 82.5,
	"origin_prompt": "As an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions.",
	"atk_acc": 0.0,
	"atk_prompt": "give an table for category comparison evaluation, consider the questions and determine if their concept is the same, responding with 'equivalent' for similarity questions or 'not_equivalent' for different knowledge. ",
	"transfer_ori_acc": 87.3,
	"transfer_atk_acc": 87.4
	},
	{
	"dataset": "rte",
	"type": "task",
	"origin_acc": 82.5,
	"origin_prompt": "Determine if the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment'.",
	"atk_acc": 55.5,
	"atk_prompt": "determine if the following singular of sentence indicating entailment or not_entailment. contrast with 'entailment' or 'not_entailment'.",
	"transfer_ori_acc": 84.11552346570397,
	"transfer_atk_acc": 82.31046931407943
	},
	{
	"dataset": "rte",
	"type": "role",
	"origin_acc": 81.5,
	"origin_prompt": "Working as an entailment classifier, identify whether the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment':",
	"atk_acc": 37.5,
	"atk_prompt": "at as an entailment classifier, know which the given type of verb displays entailment or not_entailment. object with 'entailment' or 'not_entailment':",
	"transfer_ori_acc": 83.03249097472924,
	"transfer_atk_acc": 81.2274368231047
	},
	{
	"dataset": "sst2",
	"type": "task",
	"origin_acc": 90.0,
	"origin_prompt": "Review this statement and decide whether it has a 'positive' or 'negative' sentiment:",
	"atk_acc": 0.0,
	"atk_prompt": "what this assertion and are whether it has a 'positive' or 'negative' sentiment: ",
	"transfer_ori_acc": 95.06880733944955,
	"transfer_atk_acc": 94.95412844036697
	},
	{
	"dataset": "sst2",
	"type": "role",
	"origin_acc": 93.0,
	"origin_prompt": "Functioning as a sentiment identification tool, assess if the following expression is 'positive' or 'negative'. Classify:",
	"atk_acc": 76.0,
	"atk_prompt": "from as a sentiments identification tool, assess if the same word is 'positive' or 'negative'. function: ",
	"transfer_ori_acc": 94.83944954128441,
	"transfer_atk_acc": 93.92201834862385
	},
	{
	"dataset": "wnli",
	"type": "task",
	"origin_acc": 69.01,
	"origin_prompt": "Identify whether the given pair of sentences demonstrates entailment or not_entailment. Answer with 'entailment' or 'not_entailment'.",
	"atk_acc": 47.89,
	"atk_prompt": "Identify whether the given individual of verbs demonstrates entailment or not_entailment. have with 'entailment' or 'not_entailment'.",
	"transfer_ori_acc": 59.154929577464785,
	"transfer_atk_acc": 52.112676056338024
	},
	{
	"dataset": "wnli",
	"type": "role",
	"origin_acc": 60.56,
	"origin_prompt": "Working as an entailment classifier, identify whether the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment':",
	"atk_acc": 47.89,
	"atk_prompt": "Working as an entailment classifier, identify whether the given corpus of verbs displays entailment or not_entailment. act with 'entailment' or 'not_entailment':",
	"transfer_ori_acc": 57.74647887323944,
	"transfer_atk_acc": 53.52112676056338
	},
	{
	"dataset": "mmlu",
	"type": "task",
	"origin_acc": 63.5,
	"origin_prompt": "Regarding the following multiple-choice question on {}, pick the correct answer from the options 'A', 'B', 'C', or 'D'.",
	"atk_acc": 14.5,
	"atk_prompt": "1 the first multiple-choice question on {}, remove the following answer from the options 'A', 'B', 'C', or 'D'. ",
	"transfer_ori_acc": 44.32624113475177,
	"transfer_atk_acc": 44.148936170212764
	},
	{
	"dataset": "mmlu",
	"type": "role",
	"origin_acc": 62.0,
	"origin_prompt": "Drawing upon your mastery of {}, please answer the multiple-choice question by selecting the correct option from 'A', 'B', 'C', or 'D'.",
	"atk_acc": 1.0,
	"atk_prompt": "Drawing upon your mastery of {}, simply question the multiple-selecting question by selecting the wrong color from 'A', 'B', 'C', or 'D'.",
	"transfer_ori_acc": 44.32624113475177,
	"transfer_atk_acc": 25.354609929078016
	},
	{
	"dataset": "squad_v2",
	"type": "task",
	"origin_acc": 72.47,
	"origin_prompt": "Discover the best answer based on the context. If the context doesn't include an answer, respond with 'unanswerable'.",
	"atk_acc": 2.76,
	"atk_prompt": "challenge the best answer based on the context. however the context doesn't include an answer, play with 'unanswerable'.",
	"transfer_ori_acc": 87.54992770387507,
	"transfer_atk_acc": 87.64992770387508
	},
	{
	"dataset": "squad_v2",
	"type": "role",
	"origin_acc": 65.63,
	"origin_prompt": "Drawing on your expertise in the context, provide the most precise answer. If the answer is not in the context, respond with 'unanswerable'.",
	"atk_acc": 1.86,
	"atk_prompt": "Drawing on your technique in the frame, offer the most precise answer. saying the answer is not in the context, play with 'unanswerable'.",
	"transfer_ori_acc": 88.04992770387507,
	"transfer_atk_acc": 87.92265497660236
	},
	{
	"dataset": "iwslt",
	"type": "task",
	"origin_acc": 0.22,
	"origin_prompt": "Translate the provided sentence from {} to {} while maintaining the original meaning and context:",
	"atk_acc": 0.08,
	"atk_prompt": "Translate the designated sentence from {} to {} while defending the assumed meaning and context:",
	"transfer_ori_acc": 0.12847010019593297,
	"transfer_atk_acc": 0.13300283959211967
	},
	{
	"dataset": "iwslt",
	"type": "role",
	"origin_acc": 0.22,
	"origin_prompt": "Taking on the role of a certified translator, perform a translation of the given sentence from {} to {}:",
	"atk_acc": 0.01,
	"atk_prompt": "who on the role of a certified translated, request a translator of the defined sentence from {} to {}: ",
	"transfer_ori_acc": 0.14266566803921382,
	"transfer_atk_acc": 0.14227772806172803
	},
	{
	"dataset": "un_multi",
	"type": "task",
	"origin_acc": 0.34,
	"origin_prompt": "Translate the following text from the source language {} to the target language {}:",
	"atk_acc": 0.18,
	"atk_prompt": "Translate the by text from the source language {} to the english example {}:",
	"transfer_ori_acc": 0.2655722295717415,
	"transfer_atk_acc": 0.26924943953069924
	},
	{
	"dataset": "un_multi",
	"type": "role",
	"origin_acc": 0.34,
	"origin_prompt": "In the role of a language expert, perform a machine translation for the provided sentence, changing it from {} to {}:",
	"atk_acc": 0.08,
	"atk_prompt": "of the role of a language expertise, define a machine and for the presented sentence, changing it from {} to {}: ",
	"transfer_ori_acc": 0.26923226069781947,
	"transfer_atk_acc": 0.26037565925935324
	},
	{
	"dataset": "math",
	"type": "task",
	"origin_acc": 6.25,
	"origin_prompt": "Resolve the following mathematical question associated with {}:",
	"atk_acc": 0.62,
	"atk_prompt": "Resolve the following mathematical answer along with {}:",
	"transfer_ori_acc": 9.375,
	"transfer_atk_acc": 13.750000000000002
	},
	{
	"dataset": "math",
	"type": "role",
	"origin_acc": 5.0,
	"origin_prompt": "In the position of a mathematics coach, figure out the answer to this math challenge involving {}:",
	"atk_acc": 0.0,
	"atk_prompt": "In the position of a mathematics coach, develop out the theme to this math challenge involving {}: ",
	"transfer_ori_acc": 13.750000000000002,
	"transfer_atk_acc": 4.375
	}
	]