diff --git a/app.py b/app.py index 7b30d8b2e293c746ba19bf854c4e91cf6e721709..5d39d663a32f94525ebf15d2945c2091fa68481a 100644 --- a/app.py +++ b/app.py @@ -1,9 +1,19 @@ -import streamlit as st +import streamlit as st from parse import retrieve +from transfer import retrieve_transfer - -def main(): - st.title("PromptBench") +def main(): + st.sidebar.title("Choose Function") + function_choice = st.sidebar.radio("", ["PromptBench", "Retrieve Transferability Information"]) + + if function_choice == "PromptBench": + promptbench() + + elif function_choice == "Retrieve Transferability Information": + retrieve_transferability_information() + +def promptbench(): + st.title("PromptBench") model_name = st.selectbox( "Select Model", @@ -47,5 +57,47 @@ def main(): st.write("Attack prompt: {}".format(result["attack prompt"])) st.write("Attack acc: {}".format(result["attack acc"])) -if __name__ == "__main__": - main() + +def retrieve_transferability_information(): + st.title("Retrieve Transferability Information") + source_model_name = st.selectbox( + "Select Source Model", + options=["T5", "Vicuna", "UL2", "ChatGPT"], + index=0, + ) + + target_model_name = st.selectbox( + "Select Target Model", + options=["T5", "Vicuna", "UL2", "ChatGPT"], + index=0, + ) + + if source_model_name == target_model_name: + st.write("Source model and target model cannot be the same.") + return + + attack_name = st.selectbox( + "Select Attack", + options=[ + "BertAttack", "CheckList", "DeepWordBug", "StressTest", "TextFooler", "TextBugger", "Semantic" + ], + index=0, + ) + + shot = st.selectbox( + "Select Shot", + options=[0, 3], + index=0, + ) + + data = retrieve_transfer(source_model_name, target_model_name, attack_name, shot) + for d in data: + st.write(f"Dataset: {d['dataset']}") + st.write(f"Prompt Type: {d['type']}-oriented") + st.write(f"Origin prompt: {d['origin_prompt']}") + st.write(f"Attack prompt: {d['atk_prompt']}") + st.write(f"Source model: origin acc: {d['origin_acc']}, attack acc: {d['atk_acc']}") + st.write(f"Target model: origin acc: {d['transfer_origin_acc']}, attack acc: {d['transfer_atk_acc']}") + +if __name__ == "__main__": + main() diff --git a/results_transfer/chatgpt_google-flan-t5-large/bertattack_0_shot.json b/results_transfer/chatgpt_google-flan-t5-large/bertattack_0_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..fe963bb29280fc71ffbcaaaf4b3a751c75086547 --- /dev/null +++ b/results_transfer/chatgpt_google-flan-t5-large/bertattack_0_shot.json @@ -0,0 +1,242 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 78.5, + "origin_prompt": "Assess the grammatical structure of the given sentence and classify it as 'Acceptable' or 'Unacceptable':", + "atk_acc": 0.0, + "atk_prompt": "what the grammatical structure of the given sentence and consider it as 'Acceptable' or 'Unacceptable':", + "transfer_ori_acc": 75.5, + "transfer_atk_acc": 76.3 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 80.0, + "origin_prompt": "While performing grammar analysis, classify the grammar of the following sentence as 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar:", + "atk_acc": 0.0, + "atk_prompt": "While performing grammar analysis, classify the grammar of the following sentence as 'acceptable' for so grammar or 'unacceptable' for abnormal grammar:", + "transfer_ori_acc": 75.5, + "transfer_atk_acc": 73.3 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 75.5, + "origin_prompt": "Examine the pair of sentences and decide if their meanings are identical by answering with 'equivalent' or 'not_equivalent'.", + "atk_acc": 56.0, + "atk_prompt": "Examine the pairs of pronouns and evaluate if their meanings are different by answering with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 82.1078431372549, + "transfer_atk_acc": 71.81372549019608 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 75.5, + "origin_prompt": "In your capacity as a semantic assessment tool, evaluate the provided sentences and classify their meanings as 'equivalent' or 'not_equivalent'.", + "atk_acc": 0.0, + "atk_prompt": "In your capacity as a semantic assessment tool, evaluate the provided tense and classify their constituents as 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 76.47058823529412, + "transfer_atk_acc": 79.41176470588235 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 79.0, + "origin_prompt": "Assess if the answer to the question can be logically concluded from the provided context by choosing 'entailment' or 'not_entailment'.", + "atk_acc": 0.0, + "atk_prompt": "and if the answer to the question can be logically concluded from the provided context by avoiding 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 84.8, + "transfer_atk_acc": 31.5 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 79.5, + "origin_prompt": "As a textual inference expert, analyze if the answer to the question can be deduced from the provided context and select 'entailment' or 'not_entailment'.", + "atk_acc": 0.0, + "atk_prompt": "As a textual inference expert, analysis if the answer to the question can be deduced from the provided citation and define 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 78.10000000000001, + "transfer_atk_acc": 60.5 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 79.5, + "origin_prompt": "Can these two statements be considered equal in meaning? Answer with 'equivalent' or 'not_equivalent'.", + "atk_acc": 1.0, + "atk_prompt": "Can these two constructions be considered equal in meaning? object with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 87.8, + "transfer_atk_acc": 68.89999999999999 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 82.5, + "origin_prompt": "As an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions.", + "atk_acc": 0.0, + "atk_prompt": "give an table for category comparison evaluation, consider the questions and determine if their concept is the same, responding with 'equivalent' for similarity questions or 'not_equivalent' for different knowledge. ", + "transfer_ori_acc": 87.3, + "transfer_atk_acc": 87.4 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 82.5, + "origin_prompt": "Determine if the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment'.", + "atk_acc": 55.5, + "atk_prompt": "determine if the following singular of sentence indicating entailment or not_entailment. contrast with 'entailment' or 'not_entailment'.", + "transfer_ori_acc": 84.11552346570397, + "transfer_atk_acc": 82.31046931407943 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 81.5, + "origin_prompt": "Working as an entailment classifier, identify whether the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment':", + "atk_acc": 37.5, + "atk_prompt": "at as an entailment classifier, know which the given type of verb displays entailment or not_entailment. object with 'entailment' or 'not_entailment':", + "transfer_ori_acc": 83.03249097472924, + "transfer_atk_acc": 81.2274368231047 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 90.0, + "origin_prompt": "Review this statement and decide whether it has a 'positive' or 'negative' sentiment:", + "atk_acc": 0.0, + "atk_prompt": "what this assertion and are whether it has a 'positive' or 'negative' sentiment: ", + "transfer_ori_acc": 95.06880733944955, + "transfer_atk_acc": 94.95412844036697 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 93.0, + "origin_prompt": "Functioning as a sentiment identification tool, assess if the following expression is 'positive' or 'negative'. Classify:", + "atk_acc": 76.0, + "atk_prompt": "from as a sentiments identification tool, assess if the same word is 'positive' or 'negative'. function: ", + "transfer_ori_acc": 94.83944954128441, + "transfer_atk_acc": 93.92201834862385 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 69.01, + "origin_prompt": "Identify whether the given pair of sentences demonstrates entailment or not_entailment. Answer with 'entailment' or 'not_entailment'.", + "atk_acc": 47.89, + "atk_prompt": "Identify whether the given individual of verbs demonstrates entailment or not_entailment. have with 'entailment' or 'not_entailment'.", + "transfer_ori_acc": 59.154929577464785, + "transfer_atk_acc": 52.112676056338024 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 60.56, + "origin_prompt": "Working as an entailment classifier, identify whether the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment':", + "atk_acc": 47.89, + "atk_prompt": "Working as an entailment classifier, identify whether the given corpus of verbs displays entailment or not_entailment. act with 'entailment' or 'not_entailment':", + "transfer_ori_acc": 57.74647887323944, + "transfer_atk_acc": 53.52112676056338 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 63.5, + "origin_prompt": "Regarding the following multiple-choice question on {}, pick the correct answer from the options 'A', 'B', 'C', or 'D'.", + "atk_acc": 14.5, + "atk_prompt": "1 the first multiple-choice question on {}, remove the following answer from the options 'A', 'B', 'C', or 'D'. ", + "transfer_ori_acc": 44.32624113475177, + "transfer_atk_acc": 44.148936170212764 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 62.0, + "origin_prompt": "Drawing upon your mastery of {}, please answer the multiple-choice question by selecting the correct option from 'A', 'B', 'C', or 'D'.", + "atk_acc": 1.0, + "atk_prompt": "Drawing upon your mastery of {}, simply question the multiple-selecting question by selecting the wrong color from 'A', 'B', 'C', or 'D'.", + "transfer_ori_acc": 44.32624113475177, + "transfer_atk_acc": 25.354609929078016 + }, + { + "dataset": "squad_v2", + "type": "task", + "origin_acc": 72.47, + "origin_prompt": "Discover the best answer based on the context. If the context doesn't include an answer, respond with 'unanswerable'.", + "atk_acc": 2.76, + "atk_prompt": "challenge the best answer based on the context. however the context doesn't include an answer, play with 'unanswerable'.", + "transfer_ori_acc": 87.54992770387507, + "transfer_atk_acc": 87.64992770387508 + }, + { + "dataset": "squad_v2", + "type": "role", + "origin_acc": 65.63, + "origin_prompt": "Drawing on your expertise in the context, provide the most precise answer. If the answer is not in the context, respond with 'unanswerable'.", + "atk_acc": 1.86, + "atk_prompt": "Drawing on your technique in the frame, offer the most precise answer. saying the answer is not in the context, play with 'unanswerable'.", + "transfer_ori_acc": 88.04992770387507, + "transfer_atk_acc": 87.92265497660236 + }, + { + "dataset": "iwslt", + "type": "task", + "origin_acc": 0.22, + "origin_prompt": "Translate the provided sentence from {} to {} while maintaining the original meaning and context:", + "atk_acc": 0.08, + "atk_prompt": "Translate the designated sentence from {} to {} while defending the assumed meaning and context:", + "transfer_ori_acc": 0.12847010019593297, + "transfer_atk_acc": 0.13300283959211967 + }, + { + "dataset": "iwslt", + "type": "role", + "origin_acc": 0.22, + "origin_prompt": "Taking on the role of a certified translator, perform a translation of the given sentence from {} to {}:", + "atk_acc": 0.01, + "atk_prompt": "who on the role of a certified translated, request a translator of the defined sentence from {} to {}: ", + "transfer_ori_acc": 0.14266566803921382, + "transfer_atk_acc": 0.14227772806172803 + }, + { + "dataset": "un_multi", + "type": "task", + "origin_acc": 0.34, + "origin_prompt": "Translate the following text from the source language {} to the target language {}:", + "atk_acc": 0.18, + "atk_prompt": "Translate the by text from the source language {} to the english example {}:", + "transfer_ori_acc": 0.2655722295717415, + "transfer_atk_acc": 0.26924943953069924 + }, + { + "dataset": "un_multi", + "type": "role", + "origin_acc": 0.34, + "origin_prompt": "In the role of a language expert, perform a machine translation for the provided sentence, changing it from {} to {}:", + "atk_acc": 0.08, + "atk_prompt": "of the role of a language expertise, define a machine and for the presented sentence, changing it from {} to {}: ", + "transfer_ori_acc": 0.26923226069781947, + "transfer_atk_acc": 0.26037565925935324 + }, + { + "dataset": "math", + "type": "task", + "origin_acc": 6.25, + "origin_prompt": "Resolve the following mathematical question associated with {}:", + "atk_acc": 0.62, + "atk_prompt": "Resolve the following mathematical answer along with {}:", + "transfer_ori_acc": 9.375, + "transfer_atk_acc": 13.750000000000002 + }, + { + "dataset": "math", + "type": "role", + "origin_acc": 5.0, + "origin_prompt": "In the position of a mathematics coach, figure out the answer to this math challenge involving {}:", + "atk_acc": 0.0, + "atk_prompt": "In the position of a mathematics coach, develop out the theme to this math challenge involving {}: ", + "transfer_ori_acc": 13.750000000000002, + "transfer_atk_acc": 4.375 + } +] \ No newline at end of file diff --git a/results_transfer/chatgpt_google-flan-t5-large/bertattack_3_shot.json b/results_transfer/chatgpt_google-flan-t5-large/bertattack_3_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..80234fc2964d04c99bd8f5bcb441d2a82b6abd8d --- /dev/null +++ b/results_transfer/chatgpt_google-flan-t5-large/bertattack_3_shot.json @@ -0,0 +1,242 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 81.0, + "origin_prompt": "Analyze the provided sentence and classify its grammatical correctness as 'Acceptable' or 'Unacceptable'.", + "atk_acc": 59.0, + "atk_prompt": "Analyze the reported speech and classify its inclusive correcting as 'Acceptable' or 'Unacceptable'. ", + "transfer_ori_acc": 77.10000000000001, + "transfer_atk_acc": 74.1 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 81.0, + "origin_prompt": "In the role of a syntax analyzer, examine the grammar of the provided sentence and decide if it is correct, answering with 'acceptable' for grammatically correct sentences or 'unacceptable' for incorrect ones.", + "atk_acc": 75.5, + "atk_prompt": "using the art of a level analyzeg, examine the grammar of the provided sentence and decide if it is genuine, answering with 'acceptable' for grammatically correct sentences or 'unacceptable' for incorrect ones. ", + "transfer_ori_acc": 74.2, + "transfer_atk_acc": 74.0 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 74.0, + "origin_prompt": "Compare the following sentences and determine if they share the same semantic meaning by responding with 'equivalent' or 'not_equivalent'.", + "atk_acc": 47.5, + "atk_prompt": "Compare the following sentences and determine if they lack the same aspect marking by respectively with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 80.3921568627451, + "transfer_atk_acc": 81.61764705882352 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 71.5, + "origin_prompt": "In the role of a textual comparison specialist, examine the given sentences and decide if they share the same meaning, responding with 'equivalent' or 'not_equivalent'.", + "atk_acc": 47.0, + "atk_prompt": "have the role of a textual comparison academic, examine the given sentences and determine if they share the same length, responding with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 79.65686274509804, + "transfer_atk_acc": 76.22549019607843 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 84.0, + "origin_prompt": "Consider the context and question, and indicate if the answer can be logically deduced from the context by responding with 'entailment' or 'not_entailment'.", + "atk_acc": 71.0, + "atk_prompt": "put the contexts and question, and be if the answer can be logically deduced from the question by stating with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 85.39999999999999, + "transfer_atk_acc": 78.9 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 84.0, + "origin_prompt": "As a textual inference expert, analyze if the answer to the question can be deduced from the provided context and select 'entailment' or 'not_entailment'.", + "atk_acc": 70.0, + "atk_prompt": "As a structural inference expert, analyze if the answer to the challenge can be deduced from the query audience and select 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 77.2, + "transfer_atk_acc": 82.8 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 82.5, + "origin_prompt": "Are the following two questions equivalent or not? Answer me with \"equivalent\" or \"not_equivalent\".", + "atk_acc": 78.5, + "atk_prompt": "Are the following versus answers equivalent or not? Answer me with \"equivalent\" or \"not_equivalent\". ", + "transfer_ori_acc": 87.9, + "transfer_atk_acc": 87.7 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 83.5, + "origin_prompt": "As an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions.", + "atk_acc": 78.0, + "atk_prompt": "and an instrument for question comparison analysis, consider the questions and establish if their topic is the same, responding with 'equivalent' for matching responses or 'not_equivalent' for between questions. ", + "transfer_ori_acc": 86.4, + "transfer_atk_acc": 86.5 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 85.0, + "origin_prompt": "Review the two statements and categorize their relationship as either 'entailment' or 'not_entailment'.", + "atk_acc": 59.5, + "atk_prompt": "write the second statements and categorize their relationship as neither 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 84.83754512635379, + "transfer_atk_acc": 84.47653429602889 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 83.5, + "origin_prompt": "While performing entailment analysis, classify the relationship between the provided sentences as 'entailment' or 'not_entailment'.", + "atk_acc": 79.0, + "atk_prompt": "was performing entailment analysis, classify the relationship between the first phrase as 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 83.39350180505414, + "transfer_atk_acc": 83.39350180505414 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 95.5, + "origin_prompt": "After examining the following expression, label its emotion as either 'positive' or 'negative'.", + "atk_acc": 94.5, + "atk_prompt": "After examining the following expression, label its emotion as either 'positive' or 'negative' Q47Jmd4lMV. ", + "transfer_ori_acc": 93.69266055045871, + "transfer_atk_acc": 93.92201834862385 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 97.5, + "origin_prompt": "In the capacity of a sentiment classifier, decide whether the given quote is 'positive' or 'negative'.", + "atk_acc": 96.5, + "atk_prompt": "In the capacity of a sentiment classifier, decide whether the given quote is 'positive' or 'negative' 9Q3S1F94fE. ", + "transfer_ori_acc": 94.95412844036697, + "transfer_atk_acc": 94.95412844036697 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 69.01, + "origin_prompt": "Review the two statements and categorize their relationship as either 'entailment' or 'not_entailment'.", + "atk_acc": 59.15, + "atk_prompt": "examine the paired actions and categorize their relationship as is 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 63.38028169014085, + "transfer_atk_acc": 66.19718309859155 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 66.2, + "origin_prompt": "In the role of a semantic relationship analyzer, examine the connection between the given sentences and decide if they exhibit entailment or not_entailment. Answer with 'entailment' or 'not_entailment'.", + "atk_acc": 56.34, + "atk_prompt": "In the role of a semantic relationship analyzer, define the connection between the given links and decide if they display entailment or not_entailment. Answer with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 56.33802816901409, + "transfer_atk_acc": 66.19718309859155 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 66.5, + "origin_prompt": "Please respond to the multiple-choice question about {} by selecting the appropriate answer: 'A', 'B', 'C', or 'D'.", + "atk_acc": 11.5, + "atk_prompt": "at respond to the multiple-independent question about {} by selecting the inappropriate question: 'A', 'B', 'C', or 'D'. ", + "transfer_ori_acc": 45.21276595744681, + "transfer_atk_acc": 33.51063829787234 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 65.5, + "origin_prompt": "Considering your familiarity with {}, attend to the following multiple-choice question by picking 'A', 'B', 'C', or 'D'.", + "atk_acc": 52.5, + "atk_prompt": "all your ability with {}, attend to the following multiple-select question by picking 'A', 'B', 'C', or 'D'.", + "transfer_ori_acc": 46.98581560283688, + "transfer_atk_acc": 45.92198581560284 + }, + { + "dataset": "squad_v2", + "type": "task", + "origin_acc": 72.25, + "origin_prompt": "Based on the given context, provide the best possible answer. If there's no answer available in the context, reply with 'unanswerable'.", + "atk_acc": 65.81, + "atk_prompt": "Based on the use context, provide the best possible answer. who there'tis no answer provided in the area, reply with 'unanswerable'.", + "transfer_ori_acc": 87.04992770387507, + "transfer_atk_acc": 87.54992770387507 + }, + { + "dataset": "squad_v2", + "type": "role", + "origin_acc": 72.45, + "origin_prompt": "As a proficient expert in the given context, search for the most relevant answer. If the answer cannot be found, respond by saying 'unanswerable'.", + "atk_acc": 61.02, + "atk_prompt": "As a proficient expert in the given context, try for the most specific out. If the question lacks be search, end by as 'unanswerable'.", + "transfer_ori_acc": 87.31659437054175, + "transfer_atk_acc": 87.68023073417811 + }, + { + "dataset": "iwslt", + "type": "task", + "origin_acc": 0.24, + "origin_prompt": "Convert the following sentence from its original {} language to the target language {}.", + "atk_acc": 0.21, + "atk_prompt": "Convert the following sentence from its actual {} language to the net language {}. ", + "transfer_ori_acc": 0.2240211126734209, + "transfer_atk_acc": 0.22296244610321242 + }, + { + "dataset": "iwslt", + "type": "role", + "origin_acc": 0.24, + "origin_prompt": "Assuming the responsibilities of a professional translator, translate the subsequent text passage from {} to {}.", + "atk_acc": 0.17, + "atk_prompt": "what the responsibilities of a professional scholar, translate the subsequent text passage from {} to {}. ", + "transfer_ori_acc": 0.22030199520456395, + "transfer_atk_acc": 0.22001437607394084 + }, + { + "dataset": "un_multi", + "type": "task", + "origin_acc": 0.36, + "origin_prompt": "Translate the following text from the source language {} to the target language {}.", + "atk_acc": 0.35, + "atk_prompt": "Translate the following area from the standard language {} to the target language {}. ", + "transfer_ori_acc": 0.3090347352409074, + "transfer_atk_acc": 0.30893566132252137 + }, + { + "dataset": "un_multi", + "type": "role", + "origin_acc": 0.35, + "origin_prompt": "As a qualified translator, convert the given sentence from its original {} language to the target language {}.", + "atk_acc": 0.26, + "atk_prompt": "As a qualified proper, converts the target sentence from its original {} language to the english language {}. ", + "transfer_ori_acc": 0.31127691224033316, + "transfer_atk_acc": 0.306608167162218 + }, + { + "dataset": "math", + "type": "task", + "origin_acc": 22.5, + "origin_prompt": "Compute the result of this math task concerning {}.", + "atk_acc": 3.12, + "atk_prompt": "Compute the degree of this math task concerning {}. ", + "transfer_ori_acc": 16.25, + "transfer_atk_acc": 16.875 + }, + { + "dataset": "math", + "type": "role", + "origin_acc": 22.5, + "origin_prompt": "As a mathematics instructor, calculate the answer to the following problem related to {}.", + "atk_acc": 7.5, + "atk_prompt": "as a mathematics instructor, evaluate the reply to the following problem and to {}. ", + "transfer_ori_acc": 17.5, + "transfer_atk_acc": 16.25 + } +] \ No newline at end of file diff --git a/results_transfer/chatgpt_google-flan-t5-large/checklist_0_shot.json b/results_transfer/chatgpt_google-flan-t5-large/checklist_0_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..1024b971455ba611ea6a56fe6aedbd9d208dc2e9 --- /dev/null +++ b/results_transfer/chatgpt_google-flan-t5-large/checklist_0_shot.json @@ -0,0 +1,242 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 78.5, + "origin_prompt": "Review the sentence below and identify whether its grammar is 'Acceptable' or 'Unacceptable':", + "atk_acc": 72.5, + "atk_prompt": "Review the sentence below and identify whether its grammar is 'Acceptable' or 'Unacceptable' zexKUTWJYG:", + "transfer_ori_acc": 76.5, + "transfer_atk_acc": 75.5 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 79.0, + "origin_prompt": "While performing grammar analysis, classify the grammar of the following sentence as 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar:", + "atk_acc": 45.0, + "atk_prompt": "While performing grammar analysis, classify the grammar of the following sentence as 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar zexKUTWJYG:", + "transfer_ori_acc": 75.5, + "transfer_atk_acc": 74.1 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 75.5, + "origin_prompt": "Evaluate whether the two provided sentences convey the same meaning by answering 'equivalent' or 'not_equivalent'.", + "atk_acc": 72.0, + "atk_prompt": "Evaluate whether the two provided sentences convey the same meaning by answering 'equivalent' or 'not_equivalent' SvAp8RlOFn. ", + "transfer_ori_acc": 81.86274509803921, + "transfer_atk_acc": 82.35294117647058 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 75.5, + "origin_prompt": "In your capacity as a semantic assessment tool, evaluate the provided sentences and classify their meanings as 'equivalent' or 'not_equivalent'.", + "atk_acc": 73.5, + "atk_prompt": "In your capacity as a semantic assessment tool, evaluate the provided sentences and classify their meanings as 'equivalent' or 'not_equivalent' Kw6nrs57gH. ", + "transfer_ori_acc": 76.47058823529412, + "transfer_atk_acc": 74.75490196078431 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 78.0, + "origin_prompt": "Evaluate whether the given context supports the answer to the question by responding with 'entailment' or 'not_entailment'.", + "atk_acc": 75.5, + "atk_prompt": "Evaluate whether the given context supports the answer to the question by responding with 'entailment' or 'not_entailment' ItGDrrA1Sc. ", + "transfer_ori_acc": 90.4, + "transfer_atk_acc": 89.5 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 78.0, + "origin_prompt": "As a semantic researcher, evaluate whether the provided context supports the answer to the question and choose 'entailment' or 'not_entailment'.", + "atk_acc": 74.5, + "atk_prompt": "As a semantic researcher, evaluate whether the provided context supports the answer to the question and choose 'entailment' or 'not_entailment' 6a4Yn3RGVc. ", + "transfer_ori_acc": 89.1, + "transfer_atk_acc": 88.7 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 81.5, + "origin_prompt": "Assess whether the following statements are identical in meaning by answering 'equivalent' or 'not_equivalent'.", + "atk_acc": 78.0, + "atk_prompt": "Assess whether the following statements are identical in meaning by answering 'equivalent' or 'not_equivalent' iia2yL9Uzm. ", + "transfer_ori_acc": 87.6, + "transfer_atk_acc": 87.7 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 83.0, + "origin_prompt": "As an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions.", + "atk_acc": 80.5, + "atk_prompt": "As an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions Cev0s49fbe. ", + "transfer_ori_acc": 87.3, + "transfer_atk_acc": 87.5 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 82.5, + "origin_prompt": "Determine if the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment'.", + "atk_acc": 80.0, + "atk_prompt": "Determine if the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment' MSAddJ4D2a.", + "transfer_ori_acc": 84.11552346570397, + "transfer_atk_acc": 84.11552346570397 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 81.5, + "origin_prompt": "As an instrument for entailment evaluation, consider the two sentences and determine if their relationship is 'entailment' or 'not_entailment'. Respond with 'entailment' or 'not_entailment':", + "atk_acc": 80.0, + "atk_prompt": "As an instrument for entailment evaluation, consider the two sentences and determine if their relationship is 'entailment' or 'not_entailment'. Respond with 'entailment' or 'not_entailment' 6a4Yn3RGVc:", + "transfer_ori_acc": 85.5595667870036, + "transfer_atk_acc": 85.92057761732852 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 93.0, + "origin_prompt": "Determine the overall sentiment of this sentence, categorizing it as 'positive' or 'negative':", + "atk_acc": 90.0, + "atk_prompt": "Determine the overall sentiment of this sentence, categorizing it as 'positive' or 'negative' AMsRIKZniY: ", + "transfer_ori_acc": 94.26605504587155, + "transfer_atk_acc": 94.38073394495413 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 92.5, + "origin_prompt": "Functioning as a sentiment identification tool, assess if the following expression is 'positive' or 'negative'. Classify:", + "atk_acc": 92.0, + "atk_prompt": "Functioning as a sentiment identification tool, assess if the following expression is 'positive' or 'negative'. Classify Cev0s49fbe: ", + "transfer_ori_acc": 94.83944954128441, + "transfer_atk_acc": 94.83944954128441 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 64.79, + "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.", + "atk_acc": 59.15, + "atk_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word zexKUTWJYG. ", + "transfer_ori_acc": 66.19718309859155, + "transfer_atk_acc": 66.19718309859155 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 63.38, + "origin_prompt": "In the role of a semantic relationship analyzer, examine the connection between the given sentences and decide if they exhibit entailment or not_entailment. Answer with 'entailment' or 'not_entailment':", + "atk_acc": 57.75, + "atk_prompt": "In the role of a semantic relationship analyzer, examine the connection between the given sentences and decide if they exhibit entailment or not_entailment. Answer with 'entailment' or 'not_entailment' 5XeflW1ZJc:", + "transfer_ori_acc": 54.929577464788736, + "transfer_atk_acc": 53.52112676056338 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 64.0, + "origin_prompt": "Regarding the following multiple-choice question on {}, pick the correct answer from the options 'A', 'B', 'C', or 'D'.", + "atk_acc": 58.0, + "atk_prompt": "Regarding the following multiple-choice question on {}, pick the correct answer from the options 'A', 'B', 'C', or 'D' Pbl3tYuoRo. ", + "transfer_ori_acc": 44.32624113475177, + "transfer_atk_acc": 44.50354609929078 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 61.5, + "origin_prompt": "As a skilled person in the field of {}, please respond to the multiple-choice question by choosing 'A', 'B', 'C', or 'D'.", + "atk_acc": 58.0, + "atk_prompt": "As a skilled person in the field of {}, please respond to the multiple-choice question by choosing 'A', 'B', 'C', or 'D' IhuBIhoPGc.", + "transfer_ori_acc": 44.680851063829785, + "transfer_atk_acc": 45.0354609929078 + }, + { + "dataset": "squad_v2", + "type": "task", + "origin_acc": 69.16, + "origin_prompt": "Based on the given context, provide the best possible answer. If there's no answer available in the context, respond with 'unanswerable'.", + "atk_acc": 64.99, + "atk_prompt": "Based on the given context, provide the best possible answer. If there's no answer available in the context, respond with 'unanswerable' ItGDrrA1Sc.", + "transfer_ori_acc": 87.04992770387507, + "transfer_atk_acc": 87.54992770387507 + }, + { + "dataset": "squad_v2", + "type": "role", + "origin_acc": 64.31, + "origin_prompt": "As an expert with a deep understanding of the context, find the best answer. If the context doesn't include an answer, say 'unanswerable'.", + "atk_acc": 60.2, + "atk_prompt": "As an expert with a deep understanding of the context, find the best answer. If the context doesn't include an answer, say 'unanswerable' d6ZQ3u0GBQ.", + "transfer_ori_acc": 88.04992770387507, + "transfer_atk_acc": 88.04992770387507 + }, + { + "dataset": "iwslt", + "type": "task", + "origin_acc": 0.22, + "origin_prompt": "Translate the provided sentence from {} to {} while maintaining the original meaning and context:", + "atk_acc": 0.21, + "atk_prompt": "Translate the provided sentence from {} to {} while maintaining the original meaning and context zq0DcZ5dnI:", + "transfer_ori_acc": 0.12847010019593297, + "transfer_atk_acc": 0.13452105947739143 + }, + { + "dataset": "iwslt", + "type": "role", + "origin_acc": 0.22, + "origin_prompt": "In the capacity of a proficient translator, please change the following sentence from {} to {}:", + "atk_acc": 0.21, + "atk_prompt": "In the capacity of a proficient translator, please change the following sentence from {} to SFPCqpiClT {}: ", + "transfer_ori_acc": 0.13904950536673938, + "transfer_atk_acc": 0.12181412545498613 + }, + { + "dataset": "un_multi", + "type": "task", + "origin_acc": 0.34, + "origin_prompt": "Perform a machine translation for the provided sentence, changing it from {} to {}:", + "atk_acc": 0.33, + "atk_prompt": "Perform a machine translation for the provided sentence, changing it from {} to Kw6nrs57gH {}:", + "transfer_ori_acc": 0.26814405340158265, + "transfer_atk_acc": 0.26255854470098927 + }, + { + "dataset": "un_multi", + "type": "role", + "origin_acc": 0.34, + "origin_prompt": "While acting as an experienced language expert, translate the following text from {} to {}:", + "atk_acc": 0.33, + "atk_prompt": "While acting as an experienced language expert, translate the following text from {} to Kw6nrs57gH {}: ", + "transfer_ori_acc": 0.26489202618745367, + "transfer_atk_acc": 0.25410191629878426 + }, + { + "dataset": "math", + "type": "task", + "origin_acc": 6.88, + "origin_prompt": "Calculate the answer to the following math query about {}:", + "atk_acc": 5.0, + "atk_prompt": "Calculate the answer to the following math query about 6a4Yn3RGVc {}:", + "transfer_ori_acc": 13.125, + "transfer_atk_acc": 11.875 + }, + { + "dataset": "math", + "type": "role", + "origin_acc": 5.0, + "origin_prompt": "Embodying a mathematics advisor, find the answer to the following math problem with {}:", + "atk_acc": 2.5, + "atk_prompt": "Embodying a mathematics advisor, find the answer to the following math problem with 9vGXgnbWB8 {}: ", + "transfer_ori_acc": 11.25, + "transfer_atk_acc": 12.5 + } +] \ No newline at end of file diff --git a/results_transfer/chatgpt_google-flan-t5-large/checklist_3_shot.json b/results_transfer/chatgpt_google-flan-t5-large/checklist_3_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..a938d5a690316ef484a825d8378aa7494b64843a --- /dev/null +++ b/results_transfer/chatgpt_google-flan-t5-large/checklist_3_shot.json @@ -0,0 +1,242 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 81.0, + "origin_prompt": "Examine the sentence and decide if its grammar is 'Acceptable' or 'Unacceptable'.", + "atk_acc": 78.0, + "atk_prompt": "Examine the sentence and decide if its grammar is 'Acceptable' or 'Unacceptable' 9Q3S1F94fE. ", + "transfer_ori_acc": 76.3, + "transfer_atk_acc": 76.6 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 81.5, + "origin_prompt": "Functioning as a grammar evaluation tool, analyze the given sentence and decide if it is grammatically correct, responding with 'acceptable' or 'unacceptable'.", + "atk_acc": 79.5, + "atk_prompt": "Functioning as a grammar evaluation tool, analyze the given sentence and decide if it is grammatically correct, responding with 'acceptable' or 'unacceptable' 6a4Yn3RGVc. ", + "transfer_ori_acc": 75.2, + "transfer_atk_acc": 76.3 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 74.0, + "origin_prompt": "Assess if the two given sentences have equivalent meanings by selecting 'equivalent' or 'not_equivalent'.", + "atk_acc": 68.5, + "atk_prompt": "Assess if the two given sentences have equivalent meanings by selecting 'equivalent' or 'not_equivalent' LkF0FZxMZ4. ", + "transfer_ori_acc": 82.84313725490196, + "transfer_atk_acc": 82.59803921568627 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 72.0, + "origin_prompt": "As a linguistic comparator, review the following pair of sentences and determine their semantic equivalence by choosing 'equivalent' or 'not_equivalent'.", + "atk_acc": 68.0, + "atk_prompt": "As a linguistic comparator, review the following pair of sentences and determine their semantic equivalence by choosing 'equivalent' or 'not_equivalent' bhrRSokrfa. ", + "transfer_ori_acc": 79.90196078431373, + "transfer_atk_acc": 80.63725490196079 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 84.5, + "origin_prompt": "Examine the context and question, and determine if the context logically implies the answer by selecting 'entailment' or 'not_entailment'.", + "atk_acc": 80.5, + "atk_prompt": "Examine the context and question, and determine if the context logically implies the answer by selecting 'entailment' or 'not_entailment' 9Q3S1F94fE. ", + "transfer_ori_acc": 86.5, + "transfer_atk_acc": 86.5 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 84.5, + "origin_prompt": "As a textual inference expert, analyze if the answer to the question can be deduced from the provided context and select 'entailment' or 'not_entailment'.", + "atk_acc": 81.0, + "atk_prompt": "As a textual inference expert, analyze if the answer to the question can be deduced from the provided context and select 'entailment' or 'not_entailment' YHQiDfFhw4. ", + "transfer_ori_acc": 77.2, + "transfer_atk_acc": 74.2 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 82.0, + "origin_prompt": "Can these two statements be considered equal in meaning? Answer with 'equivalent' or 'not_equivalent'.", + "atk_acc": 79.0, + "atk_prompt": "Can these two statements be considered equal in meaning? Answer with 'equivalent' or 'not_equivalent' xikCjkMydH. ", + "transfer_ori_acc": 87.6, + "transfer_atk_acc": 87.8 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 83.0, + "origin_prompt": "In the capacity of a question assessment system, indicate if the meaning of the provided questions is the same, responding with 'equivalent' or 'not_equivalent'.", + "atk_acc": 81.5, + "atk_prompt": "In the capacity of a question assessment system, indicate if the meaning of the provided questions is the same, responding with 'equivalent' or 'not_equivalent' 5XeflW1ZJc. ", + "transfer_ori_acc": 86.1, + "transfer_atk_acc": 85.9 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 84.5, + "origin_prompt": "Review the two statements and categorize their relationship as either 'entailment' or 'not_entailment'.", + "atk_acc": 80.5, + "atk_prompt": "Review the two statements and categorize their relationship as either 'entailment' or 'not_entailment' 5XeflW1ZJc. ", + "transfer_ori_acc": 84.83754512635379, + "transfer_atk_acc": 85.5595667870036 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 84.0, + "origin_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment'.", + "atk_acc": 80.5, + "atk_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment' bhrRSokrfa. ", + "transfer_ori_acc": 83.03249097472924, + "transfer_atk_acc": 83.03249097472924 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 96.0, + "origin_prompt": "Read the provided excerpt and choose between 'positive' and 'negative' to describe its sentiment.", + "atk_acc": 77.5, + "atk_prompt": "go the read excerpt and be between 'positive' and 'negative' to describing its mood. ", + "transfer_ori_acc": 94.72477064220183, + "transfer_atk_acc": 94.61009174311926 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 97.5, + "origin_prompt": "In the capacity of a sentiment classifier, decide whether the given quote is 'positive' or 'negative'.", + "atk_acc": 13.5, + "atk_prompt": "in the capacity of a sentiment classizer, chose neither the said quotes is 'positive' or 'negative'. ", + "transfer_ori_acc": 94.95412844036697, + "transfer_atk_acc": 94.95412844036697 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 69.01, + "origin_prompt": "Indicate if the connection between the following sentences is 'entailment' or 'not_entailment'.", + "atk_acc": 61.97, + "atk_prompt": "Indicate if the connection between the followping senZences is 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 64.7887323943662, + "transfer_atk_acc": 69.01408450704226 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 66.2, + "origin_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment'.", + "atk_acc": 57.75, + "atk_prompt": "In your role as an entailment analysis tool, asress the rlationship between the given sentences and classify it as 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 57.74647887323944, + "transfer_atk_acc": 54.929577464788736 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 66.0, + "origin_prompt": "Please respond to the multiple-choice question about {} by selecting the appropriate answer: 'A', 'B', 'C', or 'D'.", + "atk_acc": 63.0, + "atk_prompt": "Please respond to the multiple-choice question about {} by selecting the appropriate answer: 'A', 'B', 'C', or 'D' MQnugHcaoy. ", + "transfer_ori_acc": 45.21276595744681, + "transfer_atk_acc": 45.21276595744681 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 65.5, + "origin_prompt": "As someone well-versed in {}, please address the multiple-choice question below by selecting 'A', 'B', 'C', or 'D'.", + "atk_acc": 63.0, + "atk_prompt": "As someone well-versed in {}, please address the multiple-choice question below by selecting 'A', 'B', 'C', or 'D' mOJ9nEwT4f.", + "transfer_ori_acc": 46.63120567375886, + "transfer_atk_acc": 46.98581560283688 + }, + { + "dataset": "squad_v2", + "type": "task", + "origin_acc": 73.25, + "origin_prompt": "Based on the given context, provide the best possible answer. If there's no answer available in the context, reply with 'unanswerable'.", + "atk_acc": 71.09, + "atk_prompt": "Based on the given context, provide the best possible answer. If there's no answer available in the context, reply with 'unanswerable' ItGDrrA1Sc.", + "transfer_ori_acc": 87.04992770387507, + "transfer_atk_acc": 87.04992770387507 + }, + { + "dataset": "squad_v2", + "type": "role", + "origin_acc": 72.63, + "origin_prompt": "As a proficient expert in the given context, search for the most relevant answer. If the answer cannot be found, respond by saying 'unanswerable'.", + "atk_acc": 63.07, + "atk_prompt": "As a proficient expert in the given context, search for the most relevant answer. If the answer cannot be found, respond by saying 'unanswerable' XStsD2pUzu.", + "transfer_ori_acc": 87.31659437054175, + "transfer_atk_acc": 87.31659437054175 + }, + { + "dataset": "iwslt", + "type": "task", + "origin_acc": 0.24, + "origin_prompt": "Convert the following sentence from its original {} language to the target language {}.", + "atk_acc": 0.22, + "atk_prompt": "Convert the following sentence from its original {} language to the target language 5XeflW1ZJc {}. ", + "transfer_ori_acc": 0.2240211126734209, + "transfer_atk_acc": 0.22156249809369105 + }, + { + "dataset": "iwslt", + "type": "role", + "origin_acc": 0.24, + "origin_prompt": "In the role of a language expert, perform a machine translation for the provided sentence, changing it from {} to {}.", + "atk_acc": 0.23, + "atk_prompt": "In the role of a language expert, perform a machine translation for the provided sentence, changing it from {} to MQnugHcaoy {}. ", + "transfer_ori_acc": 0.22070346732323043, + "transfer_atk_acc": 0.2112357399871226 + }, + { + "dataset": "un_multi", + "type": "task", + "origin_acc": 0.35, + "origin_prompt": "Perform a machine translation for the provided sentence, changing it from {} to {}.", + "atk_acc": 0.34, + "atk_prompt": "Perform a machine translation for the provided sentence, changing it from {} to Kw6nrs57gH {}. ", + "transfer_ori_acc": 0.30870734220870066, + "transfer_atk_acc": 0.3125050772206132 + }, + { + "dataset": "un_multi", + "type": "role", + "origin_acc": 0.35, + "origin_prompt": "As a qualified translator, convert the given sentence from its original {} language to the target language {}.", + "atk_acc": 0.31, + "atk_prompt": "As a qualified translator, convert the given sentence from its original {} language to the target language 5XeflW1ZJc {}. ", + "transfer_ori_acc": 0.31127691224033316, + "transfer_atk_acc": 0.31337641480593154 + }, + { + "dataset": "math", + "type": "task", + "origin_acc": 23.12, + "origin_prompt": "Calculate the answer to the following math query about {}.", + "atk_acc": 19.38, + "atk_prompt": "Calculate the answer to the following math query about 5XeflW1ZJc {}. ", + "transfer_ori_acc": 16.25, + "transfer_atk_acc": 17.5 + }, + { + "dataset": "math", + "type": "role", + "origin_acc": 22.5, + "origin_prompt": "As a mathematics instructor, calculate the answer to the following problem related to {}.", + "atk_acc": 21.25, + "atk_prompt": "As a mathematics instructor, calculate the answer to the following problem related to AMsRIKZniY {}. ", + "transfer_ori_acc": 17.5, + "transfer_atk_acc": 17.5 + } +] \ No newline at end of file diff --git a/results_transfer/chatgpt_google-flan-t5-large/deepwordbug_0_shot.json b/results_transfer/chatgpt_google-flan-t5-large/deepwordbug_0_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..1145949c9cae75df9e6558b198c56c65a4f87982 --- /dev/null +++ b/results_transfer/chatgpt_google-flan-t5-large/deepwordbug_0_shot.json @@ -0,0 +1,242 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 79.0, + "origin_prompt": "Review the sentence below and identify whether its grammar is 'Acceptable' or 'Unacceptable':", + "atk_acc": 74.5, + "atk_prompt": "Review the sentece below and identify whether its grammar is 'Acceptable' or 'Unacceptable':", + "transfer_ori_acc": 76.5, + "transfer_atk_acc": 73.4 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 79.5, + "origin_prompt": "While performing grammar analysis, classify the grammar of the following sentence as 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar:", + "atk_acc": 0.0, + "atk_prompt": "nhile performing grammr analysis, classify the rgammar of the fllowing sentenZe as 'acceptable' for corFect Xrammar or 'unacceptable' for incorrect gramUar:", + "transfer_ori_acc": 75.5, + "transfer_atk_acc": 72.5 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 74.0, + "origin_prompt": "Evaluate whether the two provided sentences convey the same meaning by answering 'equivalent' or 'not_equivalent'.", + "atk_acc": 70.0, + "atk_prompt": "Evaluate whether the two provRided sentnces convey the same meaning by answering 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 81.86274509803921, + "transfer_atk_acc": 78.43137254901961 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 76.0, + "origin_prompt": "In your capacity as a semantic assessment tool, evaluate the provided sentences and classify their meanings as 'equivalent' or 'not_equivalent'.", + "atk_acc": 53.0, + "atk_prompt": "nI your capacity as a sematnic asseJssment tol, evaluate the prvoided senRtences and classify their meainngs as 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 76.47058823529412, + "transfer_atk_acc": 78.67647058823529 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 78.0, + "origin_prompt": "Evaluate whether the given context supports the answer to the question by responding with 'entailment' or 'not_entailment'.", + "atk_acc": 50.0, + "atk_prompt": "Exaluate whetxher the iven contet Supports the anwser to the queston by respEonding with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 90.4, + "transfer_atk_acc": 50.1 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 79.5, + "origin_prompt": "As a textual inference expert, analyze if the answer to the question can be deduced from the provided context and select 'entailment' or 'not_entailment'.", + "atk_acc": 19.5, + "atk_prompt": "As a textula inference expert, aInalyze if the answer to the question can be deducud from the provided context and Melect 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 78.10000000000001, + "transfer_atk_acc": 81.6 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 82.0, + "origin_prompt": "Assess whether the following statements are identical in meaning by answering 'equivalent' or 'not_equivalent'.", + "atk_acc": 45.0, + "atk_prompt": "Assess whethdr the followng sLtatements are identocal in meuaning by answerinJ 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 87.6, + "transfer_atk_acc": 85.0 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 82.5, + "origin_prompt": "As an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions.", + "atk_acc": 79.5, + "atk_prompt": "Ao an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions. ", + "transfer_ori_acc": 87.3, + "transfer_atk_acc": 86.9 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 83.5, + "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.", + "atk_acc": 79.0, + "atk_prompt": "ArYe the folowing two sentenes entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word. ", + "transfer_ori_acc": 88.08664259927798, + "transfer_atk_acc": 83.39350180505414 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 80.5, + "origin_prompt": "As an instrument for entailment evaluation, consider the two sentences and determine if their relationship is 'entailment' or 'not_entailment'. Respond with 'entailment' or 'not_entailment':", + "atk_acc": 78.5, + "atk_prompt": "IAs an instrument for entailment evaluatioRn, consider the otwo sentences and determine if their relationship is 'entailment' or 'not_entailment'. Respond with 'entailment' or 'not_entailment':", + "transfer_ori_acc": 85.5595667870036, + "transfer_atk_acc": 85.92057761732852 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 93.0, + "origin_prompt": "Determine the overall sentiment of this sentence, categorizing it as 'positive' or 'negative':", + "atk_acc": 88.5, + "atk_prompt": "Deetrmine the overall sentLment of this sentence, categorizing it as 'positive' or 'negative': ", + "transfer_ori_acc": 94.26605504587155, + "transfer_atk_acc": 94.72477064220183 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 93.0, + "origin_prompt": "Functioning as a sentiment identification tool, assess if the following expression is 'positive' or 'negative'. Classify:", + "atk_acc": 85.0, + "atk_prompt": "uFnctioning as a sentmient identification tool, assess if the folGowing expression is 'positive' or 'negative'. Classify: ", + "transfer_ori_acc": 94.83944954128441, + "transfer_atk_acc": 93.34862385321101 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 66.2, + "origin_prompt": "Examine the pair of sentences and determine if they exhibit entailment or not_entailment. Answer with either 'entailment' or 'not_entailment'.", + "atk_acc": 53.52, + "atk_prompt": "Examine the Bair of snetences and determine if they exhibit entailment or not_entailment. Ancwer with eMther 'entailment' or 'not_entailment'.", + "transfer_ori_acc": 61.97183098591549, + "transfer_atk_acc": 63.38028169014085 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 63.38, + "origin_prompt": "In the role of a semantic relationship analyzer, examine the connection between the given sentences and decide if they exhibit entailment or not_entailment. Answer with 'entailment' or 'not_entailment':", + "atk_acc": 53.52, + "atk_prompt": "IQn the role of a semantic relationship analyzer, eamine the connection between the given sentences and decide if they exhbit entailment or not_entailment. Answer with 'entailment' or 'not_entailment':", + "transfer_ori_acc": 54.929577464788736, + "transfer_atk_acc": 54.929577464788736 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 62.5, + "origin_prompt": "Regarding the following multiple-choice question on {}, pick the correct answer from the options 'A', 'B', 'C', or 'D'.", + "atk_acc": 60.0, + "atk_prompt": "Regarding the following multiple-choice question on {}, pick the correct answeWr from the options 'A', 'B', 'C', or 'D'. ", + "transfer_ori_acc": 44.32624113475177, + "transfer_atk_acc": 44.680851063829785 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 62.5, + "origin_prompt": "With your knowledge of {}, tackle the following multiple-choice question by choosing 'A', 'B', 'C', or 'D'.", + "atk_acc": 59.0, + "atk_prompt": "With your knowledge of {}, tackle the follQwing multiple-choice question by choosing 'A', 'B', 'C', or 'D'.", + "transfer_ori_acc": 43.97163120567376, + "transfer_atk_acc": 43.08510638297872 + }, + { + "dataset": "squad_v2", + "type": "task", + "origin_acc": 71.87, + "origin_prompt": "Discover the best answer based on the context. If the context doesn't include an answer, respond with 'unanswerable'.", + "atk_acc": 56.61, + "atk_prompt": "DiscoveMr the best answr bassd on the Rontext. Iof the cnotext doesn't include an answer, resCpond with 'unanswerable'.", + "transfer_ori_acc": 87.54992770387507, + "transfer_atk_acc": 87.08932164326902 + }, + { + "dataset": "squad_v2", + "type": "role", + "origin_acc": 64.5, + "origin_prompt": "As an expert with a deep understanding of the context, find the best answer. If the context doesn't include an answer, say 'unanswerable'.", + "atk_acc": 36.52, + "atk_prompt": "zs an exprt with a deep understanding of the context, find the best answr. If the context doesn't include an anCswer, asy 'unanswerable'.", + "transfer_ori_acc": 88.04992770387507, + "transfer_atk_acc": 87.08932164326902 + }, + { + "dataset": "iwslt", + "type": "task", + "origin_acc": 0.22, + "origin_prompt": "Translate the provided sentence from {} to {} while maintaining the original meaning and context:", + "atk_acc": 0.21, + "atk_prompt": "Translate the provideMd sentence from {} to {} while mainOaining the original meaning and context:", + "transfer_ori_acc": 0.12847010019593297, + "transfer_atk_acc": 0.13470031324495513 + }, + { + "dataset": "iwslt", + "type": "role", + "origin_acc": 0.22, + "origin_prompt": "In the capacity of a proficient translator, please change the following sentence from {} to {}:", + "atk_acc": 0.21, + "atk_prompt": "In the capacity of a proficient tanslator, plnase change the following senlence from {} to {}: ", + "transfer_ori_acc": 0.13904950536673938, + "transfer_atk_acc": 0.1211819424209054 + }, + { + "dataset": "un_multi", + "type": "task", + "origin_acc": 0.34, + "origin_prompt": "Translate the following text from the source language {} to the target language {}:", + "atk_acc": 0.33, + "atk_prompt": "Translate the following text from the source language {} to the Ptarget langugae {}:", + "transfer_ori_acc": 0.2655722295717415, + "transfer_atk_acc": 0.24955021119352877 + }, + { + "dataset": "un_multi", + "type": "role", + "origin_acc": 0.34, + "origin_prompt": "In the role of a language expert, perform a machine translation for the provided sentence, changing it from {} to {}:", + "atk_acc": 0.32, + "atk_prompt": "In the role of a language expert, pErform a macRhine yranslation for the provided sentence, cZanging it from {} to {}: ", + "transfer_ori_acc": 0.26923226069781947, + "transfer_atk_acc": 0.2529020268653232 + }, + { + "dataset": "math", + "type": "task", + "origin_acc": 6.88, + "origin_prompt": "Resolve the following mathematical question associated with {}:", + "atk_acc": 5.62, + "atk_prompt": "Resolve the following mathematical quXestion assocated with {}:", + "transfer_ori_acc": 9.375, + "transfer_atk_acc": 11.875 + }, + { + "dataset": "math", + "type": "role", + "origin_acc": 5.0, + "origin_prompt": "Embodying a mathematics advisor, find the answer to the following math problem with {}:", + "atk_acc": 2.5, + "atk_prompt": "Embodying a mathematics advisor, ind the anwer to the fpollowing math problem with {}: ", + "transfer_ori_acc": 11.25, + "transfer_atk_acc": 12.5 + } +] \ No newline at end of file diff --git a/results_transfer/chatgpt_google-flan-t5-large/deepwordbug_3_shot.json b/results_transfer/chatgpt_google-flan-t5-large/deepwordbug_3_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..a3abb3f9938e9d3b6246e50d2812589a7fd018dc --- /dev/null +++ b/results_transfer/chatgpt_google-flan-t5-large/deepwordbug_3_shot.json @@ -0,0 +1,242 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 80.5, + "origin_prompt": "Examine the given sentence and decide if it is grammatically sound. Answer with either 'Acceptable' or 'Unacceptable'.", + "atk_acc": 78.5, + "atk_prompt": "Examine the given sentence and decide if it is gramatically sound. nAswer with either 'Acceptable' or 'Unacceptable'. ", + "transfer_ori_acc": 77.7, + "transfer_atk_acc": 77.8 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 81.5, + "origin_prompt": "Functioning as a grammar evaluation tool, analyze the given sentence and decide if it is grammatically correct, responding with 'acceptable' or 'unacceptable'.", + "atk_acc": 78.5, + "atk_prompt": "Funtioning as a graMmar evaluation tool, analyzY the given sentence and decide if it is grammatically corLect, responding with 'acceptable' or 'unacceptable'. ", + "transfer_ori_acc": 75.2, + "transfer_atk_acc": 75.8 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 74.5, + "origin_prompt": "Examine the pair of sentences and decide if their meanings are identical by answering with 'equivalent' or 'not_equivalent'.", + "atk_acc": 65.5, + "atk_prompt": "Examine the Kair of sentences and edcide if their meanijngs are ibentical by answering with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 82.84313725490196, + "transfer_atk_acc": 79.90196078431373 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 72.0, + "origin_prompt": "As a linguistic comparator, review the following pair of sentences and determine their semantic equivalence by choosing 'equivalent' or 'not_equivalent'.", + "atk_acc": 66.5, + "atk_prompt": "AV a linguistic comaprator, review the following pair of sentences and determine their semayntic equivalence by chhoosing 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 79.90196078431373, + "transfer_atk_acc": 79.16666666666666 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 85.0, + "origin_prompt": "Examine the context and question, and determine if the context logically implies the answer by selecting 'entailment' or 'not_entailment'.", + "atk_acc": 77.5, + "atk_prompt": "Examine the contedt and question, and determine if the cXontext logically implies the answer by selecting 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 86.5, + "transfer_atk_acc": 87.7 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 86.0, + "origin_prompt": "As a textual analyst, examine if the given context logically implies the answer to the question and indicate your decision with 'entailment' or 'not_entailment'.", + "atk_acc": 75.0, + "atk_prompt": "A a textual analyst, xamine if the gPven conSext logiclaly implies the asnwer to the qbuestion and indicate your decision with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 88.8, + "transfer_atk_acc": 80.10000000000001 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 82.0, + "origin_prompt": "Can these two statements be considered equal in meaning? Answer with 'equivalent' or 'not_equivalent'.", + "atk_acc": 80.0, + "atk_prompt": "Can these two statements be considJred equal in meaning? Answer with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 87.6, + "transfer_atk_acc": 87.9 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 83.0, + "origin_prompt": "Acting as a question equivalence instrument, determine if the provided questions are equivalent in meaning, answering with 'equivalent' for similar questions or 'not_equivalent' for dissimilar ones.", + "atk_acc": 81.5, + "atk_prompt": "Actinm as a question equivalence instrumetn, determine if the provided questions are equivalent in meaning, answering with 'equivalent' for similar questions or 'not_equivalent' for dissimilar ones. ", + "transfer_ori_acc": 86.5, + "transfer_atk_acc": 85.6 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 85.0, + "origin_prompt": "Does the relationship between the given sentences represent entailment or not_entailment? Respond with 'entailment' or 'not_entailment'.", + "atk_acc": 80.5, + "atk_prompt": "Does the relationshBp between the given sentences repersent entailment or not_entailment? Respond with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 83.75451263537906, + "transfer_atk_acc": 82.67148014440433 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 84.5, + "origin_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment'.", + "atk_acc": 81.0, + "atk_prompt": "In your role as an entailment analysis toIol, aZssess the relationship between the given sentences and cassify it as 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 83.03249097472924, + "transfer_atk_acc": 82.31046931407943 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 97.0, + "origin_prompt": "Evaluate the sentiment of the given text and classify it as 'positive' or 'negative'.", + "atk_acc": 94.0, + "atk_prompt": "ENvaluate the senttment of the giOven text and classify it as 'positive' or 'negative'. ", + "transfer_ori_acc": 94.61009174311926, + "transfer_atk_acc": 95.29816513761467 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 97.0, + "origin_prompt": "As a sentiment classifier, determine whether the following text is 'positive' or 'negative'.", + "atk_acc": 94.0, + "atk_prompt": "qAs a setniment classifier, determnne whether the following text is 'positive' or 'negative'. ", + "transfer_ori_acc": 94.4954128440367, + "transfer_atk_acc": 95.29816513761467 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 67.61, + "origin_prompt": "Indicate if the connection between the following sentences is 'entailment' or 'not_entailment'.", + "atk_acc": 64.79, + "atk_prompt": "Indicate if the connection between the following sentences is 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 64.7887323943662, + "transfer_atk_acc": 38.028169014084504 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 64.79, + "origin_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment'.", + "atk_acc": 59.15, + "atk_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 57.74647887323944, + "transfer_atk_acc": 46.478873239436616 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 66.5, + "origin_prompt": "In relation to the multiple-choice question on {}, please provide the accurate answer by choosing 'A', 'B', 'C', or 'D'.", + "atk_acc": 63.5, + "atk_prompt": "ISn ralation to the mulitple-choice question on {}, pleare provide the accurate answer by choosing 'A', 'B', 'C', or 'D'. ", + "transfer_ori_acc": 46.808510638297875, + "transfer_atk_acc": 45.39007092198582 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 65.5, + "origin_prompt": "As someone well-versed in {}, please address the multiple-choice question below by selecting 'A', 'B', 'C', or 'D'.", + "atk_acc": 62.5, + "atk_prompt": "As someone well-versed in {}, please address the mulZtiple-choice question below by selecting 'A', 'B', 'C', or 'D'.", + "transfer_ori_acc": 46.63120567375886, + "transfer_atk_acc": 45.744680851063826 + }, + { + "dataset": "squad_v2", + "type": "task", + "origin_acc": 73.25, + "origin_prompt": "Based on the given context, provide the best possible answer. If there's no answer available in the context, reply with 'unanswerable'.", + "atk_acc": 67.18, + "atk_prompt": "Based on the goiven context, provide the best possible answer. IL there's no answer available in the coYtext, reply with 'unanswerable'.", + "transfer_ori_acc": 87.04992770387507, + "transfer_atk_acc": 87.54992770387507 + }, + { + "dataset": "squad_v2", + "type": "role", + "origin_acc": 72.68, + "origin_prompt": "As a subject matter expert, extract the correct answer from the context. If an answer is not present, indicate 'unanswerable'.", + "atk_acc": 66.77, + "atk_prompt": "s a subject matter eAxpert, extract the correc answer from the context. If an answer is not presnt, idicate 'unanswerable'.", + "transfer_ori_acc": 87.04992770387507, + "transfer_atk_acc": 87.14992770387508 + }, + { + "dataset": "iwslt", + "type": "task", + "origin_acc": 0.23, + "origin_prompt": "Given the sentence below, perform a machine translation from {} to {}.", + "atk_acc": 0.23, + "atk_prompt": "Given the sentence below, perform a mRachine traCslation from {} to {}. ", + "transfer_ori_acc": 0.21292285800980326, + "transfer_atk_acc": 0.21552262845169096 + }, + { + "dataset": "iwslt", + "type": "role", + "origin_acc": 0.24, + "origin_prompt": "Assuming the responsibilities of a professional translator, translate the subsequent text passage from {} to {}.", + "atk_acc": 0.23, + "atk_prompt": "Assuming the responsibilities of a professional transaltor, translate the subsequent text passage from {} to {}. ", + "transfer_ori_acc": 0.22030199520456395, + "transfer_atk_acc": 0.21973314066426236 + }, + { + "dataset": "un_multi", + "type": "task", + "origin_acc": 0.35, + "origin_prompt": "Please perform a translation of the given sentence, converting it from {} to {}.", + "atk_acc": 0.26, + "atk_prompt": "Plese erform a translation of the Fiven sentence, converwing it from {} to {}. ", + "transfer_ori_acc": 0.3076276888167265, + "transfer_atk_acc": 0.3054838402284908 + }, + { + "dataset": "un_multi", + "type": "role", + "origin_acc": 0.35, + "origin_prompt": "As a qualified translator, convert the given sentence from its original {} language to the target language {}.", + "atk_acc": 0.35, + "atk_prompt": "As a qualified translator, convert the given sentence from its original {} language to the targte language {}. ", + "transfer_ori_acc": 0.31127691224033316, + "transfer_atk_acc": 0.3078172023513576 + }, + { + "dataset": "math", + "type": "task", + "origin_acc": 23.12, + "origin_prompt": "Calculate the answer to the following math query about {}.", + "atk_acc": 18.75, + "atk_prompt": "Calculate the ansewr to the followiJg math query about {}. ", + "transfer_ori_acc": 16.25, + "transfer_atk_acc": 16.25 + }, + { + "dataset": "math", + "type": "role", + "origin_acc": 23.12, + "origin_prompt": "As a mathematics instructor, calculate the answer to the following problem related to {}.", + "atk_acc": 20.62, + "atk_prompt": "As a mathematics insrtuctor, calculate the answer to the following problem Vrelated to {}. ", + "transfer_ori_acc": 17.5, + "transfer_atk_acc": 16.25 + } +] \ No newline at end of file diff --git a/results_transfer/chatgpt_google-flan-t5-large/stresstest_0_shot.json b/results_transfer/chatgpt_google-flan-t5-large/stresstest_0_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..33d94ec42f04983ca1a766853a3f98ac46de61ae --- /dev/null +++ b/results_transfer/chatgpt_google-flan-t5-large/stresstest_0_shot.json @@ -0,0 +1,242 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 78.5, + "origin_prompt": "Review the sentence below and identify whether its grammar is 'Acceptable' or 'Unacceptable':", + "atk_acc": 32.5, + "atk_prompt": "Review the sentence below and identify whether its grammar is 'Acceptable' or 'Unacceptable' and true is true :", + "transfer_ori_acc": 76.5, + "transfer_atk_acc": 68.5 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 79.5, + "origin_prompt": "While performing grammar analysis, classify the grammar of the following sentence as 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar:", + "atk_acc": 75.0, + "atk_prompt": "While performing grammar analysis, classify the grammar of the following sentence as 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar and true is true and true is true and true is true and true is true and true is true :", + "transfer_ori_acc": 75.5, + "transfer_atk_acc": 66.3 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 77.5, + "origin_prompt": "Can the given sentences be considered semantically identical? Please reply with 'equivalent' or 'not_equivalent'.", + "atk_acc": 73.0, + "atk_prompt": "Can the given sentences be considered semantically identical? Please reply with 'equivalent' or 'not_equivalent' and false is not true . ", + "transfer_ori_acc": 80.63725490196079, + "transfer_atk_acc": 76.9607843137255 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 76.0, + "origin_prompt": "In your capacity as a semantic assessment tool, evaluate the provided sentences and classify their meanings as 'equivalent' or 'not_equivalent'.", + "atk_acc": 15.5, + "atk_prompt": "In your capacity as a semantic assessment tool, evaluate the provided sentences and classify their meanings as 'equivalent' or 'not_equivalent' and true is true . ", + "transfer_ori_acc": 76.47058823529412, + "transfer_atk_acc": 79.16666666666666 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 78.5, + "origin_prompt": "Evaluate whether the given context supports the answer to the question by responding with 'entailment' or 'not_entailment'.", + "atk_acc": 74.5, + "atk_prompt": "Evaluate whether the given context supports the answer to the question by responding with 'entailment' or 'not_entailment' and true is true . ", + "transfer_ori_acc": 90.4, + "transfer_atk_acc": 88.8 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 79.5, + "origin_prompt": "As a textual inference expert, analyze if the answer to the question can be deduced from the provided context and select 'entailment' or 'not_entailment'.", + "atk_acc": 69.0, + "atk_prompt": "As a textual inference expert, analyze if the answer to the question can be deduced from the provided context and select 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 78.10000000000001, + "transfer_atk_acc": 63.1 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 80.0, + "origin_prompt": "Can these two statements be considered equal in meaning? Answer with 'equivalent' or 'not_equivalent'.", + "atk_acc": 72.5, + "atk_prompt": "Can these two statements be considered equal in meaning? Answer with 'equivalent' or 'not_equivalent' and false is not true . ", + "transfer_ori_acc": 87.8, + "transfer_atk_acc": 83.2 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 81.0, + "origin_prompt": "Functioning as a question similarity evaluation tool, analyze the given questions and decide if they share the same meaning, responding with 'equivalent' or 'not_equivalent'.", + "atk_acc": 80.5, + "atk_prompt": "Functioning as a question similarity evaluation tool, analyze the given questions and decide if they share the same meaning, responding with 'equivalent' or 'not_equivalent' and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 87.3, + "transfer_atk_acc": 80.10000000000001 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 83.5, + "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.", + "atk_acc": 81.5, + "atk_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word and true is true . ", + "transfer_ori_acc": 88.08664259927798, + "transfer_atk_acc": 86.64259927797833 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 81.5, + "origin_prompt": "As an instrument for entailment evaluation, consider the two sentences and determine if their relationship is 'entailment' or 'not_entailment'. Respond with 'entailment' or 'not_entailment':", + "atk_acc": 80.5, + "atk_prompt": "As an instrument for entailment evaluation, consider the two sentences and determine if their relationship is 'entailment' or 'not_entailment'. Respond with 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true :", + "transfer_ori_acc": 85.5595667870036, + "transfer_atk_acc": 86.28158844765343 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 91.0, + "origin_prompt": "Review this statement and decide whether it has a 'positive' or 'negative' sentiment:", + "atk_acc": 0.0, + "atk_prompt": "Review this statement and decide whether it has a 'positive' or 'negative' sentiment and true is true : ", + "transfer_ori_acc": 95.06880733944955, + "transfer_atk_acc": 94.26605504587155 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 93.5, + "origin_prompt": "Functioning as a sentiment identification tool, assess if the following expression is 'positive' or 'negative'. Classify:", + "atk_acc": 93.0, + "atk_prompt": "Functioning as a sentiment identification tool, assess if the following expression is 'positive' or 'negative'. Classify and false is not true : ", + "transfer_ori_acc": 94.83944954128441, + "transfer_atk_acc": 93.80733944954129 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 61.97, + "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.", + "atk_acc": 63.38, + "atk_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word and true is true . ", + "transfer_ori_acc": 66.19718309859155, + "transfer_atk_acc": 66.19718309859155 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 63.38, + "origin_prompt": "In the role of a semantic relationship analyzer, examine the connection between the given sentences and decide if they exhibit entailment or not_entailment. Answer with 'entailment' or 'not_entailment':", + "atk_acc": 59.15, + "atk_prompt": "In the role of a semantic relationship analyzer, examine the connection between the given sentences and decide if they exhibit entailment or not_entailment. Answer with 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true :", + "transfer_ori_acc": 54.929577464788736, + "transfer_atk_acc": 61.97183098591549 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 63.0, + "origin_prompt": "For each multiple-choice question about {}, identify the correct answer by selecting 'A', 'B', 'C', or 'D'.", + "atk_acc": 53.5, + "atk_prompt": "For each multiple-choice question about {}, identify the correct answer by selecting 'A', 'B', 'C', or 'D' and true is true . ", + "transfer_ori_acc": 45.744680851063826, + "transfer_atk_acc": 45.0354609929078 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 61.5, + "origin_prompt": "As a skilled person in the field of {}, please respond to the multiple-choice question by choosing 'A', 'B', 'C', or 'D'.", + "atk_acc": 57.0, + "atk_prompt": "As a skilled person in the field of {}, please respond to the multiple-choice question by choosing 'A', 'B', 'C', or 'D' and false is not true .", + "transfer_ori_acc": 44.680851063829785, + "transfer_atk_acc": 45.21276595744681 + }, + { + "dataset": "squad_v2", + "type": "task", + "origin_acc": 69.3, + "origin_prompt": "Based on the given context, provide the best possible answer. If there's no answer available in the context, respond with 'unanswerable'.", + "atk_acc": 69.88, + "atk_prompt": "Based on the given context, provide the best possible answer. If there's no answer available in the context, respond with 'unanswerable' and true is true .", + "transfer_ori_acc": 87.04992770387507, + "transfer_atk_acc": 87.14992770387508 + }, + { + "dataset": "squad_v2", + "type": "role", + "origin_acc": 64.4, + "origin_prompt": "As an expert with a deep understanding of the context, find the best answer. If the context doesn't include an answer, say 'unanswerable'.", + "atk_acc": 64.49, + "atk_prompt": "As an expert with a deep understanding of the context, find the best answer. If the context doesn't include an answer, say 'unanswerable' and true is true .", + "transfer_ori_acc": 88.04992770387507, + "transfer_atk_acc": 87.64992770387508 + }, + { + "dataset": "iwslt", + "type": "task", + "origin_acc": 0.22, + "origin_prompt": "Using machine translation, convert the given sentence from {} into the {} language:", + "atk_acc": 0.19, + "atk_prompt": "Using machine translation, convert the given sentence from {} into the {} language and false is not true :", + "transfer_ori_acc": 0.13366423747896072, + "transfer_atk_acc": 0.1282464400428448 + }, + { + "dataset": "iwslt", + "type": "role", + "origin_acc": 0.22, + "origin_prompt": "In the capacity of a proficient translator, please change the following sentence from {} to {}:", + "atk_acc": 0.21, + "atk_prompt": "In the capacity of a proficient translator, please change the following sentence from {} to and true is true and true is true and true is true and true is true and true is true {}: ", + "transfer_ori_acc": 0.13904950536673938, + "transfer_atk_acc": 0.04013983595471011 + }, + { + "dataset": "un_multi", + "type": "task", + "origin_acc": 0.34, + "origin_prompt": "Translate the subsequent text passage from its original {} language to the {} language:", + "atk_acc": 0.29, + "atk_prompt": "Translate the subsequent text passage from its original {} language to the {} language and false is not true :", + "transfer_ori_acc": 0.2679607664026083, + "transfer_atk_acc": 0.24062998420275192 + }, + { + "dataset": "un_multi", + "type": "role", + "origin_acc": 0.34, + "origin_prompt": "In the role of a language expert, perform a machine translation for the provided sentence, changing it from {} to {}:", + "atk_acc": 0.31, + "atk_prompt": "In the role of a language expert, perform a machine translation for the provided sentence, changing it from {} to and false is not true {}: ", + "transfer_ori_acc": 0.26923226069781947, + "transfer_atk_acc": 0.20805924250934474 + }, + { + "dataset": "math", + "type": "task", + "origin_acc": 6.25, + "origin_prompt": "Resolve the following mathematical question associated with {}:", + "atk_acc": 5.62, + "atk_prompt": "Resolve the following mathematical question associated with and true is true {}:", + "transfer_ori_acc": 9.375, + "transfer_atk_acc": 13.125 + }, + { + "dataset": "math", + "type": "role", + "origin_acc": 5.0, + "origin_prompt": "Embodying a mathematics advisor, find the answer to the following math problem with {}:", + "atk_acc": 3.12, + "atk_prompt": "Embodying a mathematics advisor, find the answer to the following math problem with and true is true and true is true and true is true and true is true and true is true {}: ", + "transfer_ori_acc": 11.25, + "transfer_atk_acc": 13.750000000000002 + } +] \ No newline at end of file diff --git a/results_transfer/chatgpt_google-flan-t5-large/stresstest_3_shot.json b/results_transfer/chatgpt_google-flan-t5-large/stresstest_3_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..2074c8964ea9069f7445bb70e642f581b1284d9b --- /dev/null +++ b/results_transfer/chatgpt_google-flan-t5-large/stresstest_3_shot.json @@ -0,0 +1,242 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 80.5, + "origin_prompt": "Examine the given sentence and decide if it is grammatically sound. Answer with either 'Acceptable' or 'Unacceptable'.", + "atk_acc": 79.5, + "atk_prompt": "Examine the given sentence and decide if it is grammatically sound. Answer with either 'Acceptable' or 'Unacceptable' and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 77.7, + "transfer_atk_acc": 77.60000000000001 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 81.5, + "origin_prompt": "Functioning as a grammar evaluation tool, analyze the given sentence and decide if it is grammatically correct, responding with 'acceptable' or 'unacceptable'.", + "atk_acc": 80.5, + "atk_prompt": "Functioning as a grammar evaluation tool, analyze the given sentence and decide if it is grammatically correct, responding with 'acceptable' or 'unacceptable' and false is not true . ", + "transfer_ori_acc": 75.2, + "transfer_atk_acc": 75.6 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 73.5, + "origin_prompt": "Assess if the two given sentences have equivalent meanings by selecting 'equivalent' or 'not_equivalent'.", + "atk_acc": 71.5, + "atk_prompt": "Assess if the two given sentences have equivalent meanings by selecting 'equivalent' or 'not_equivalent' and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 82.84313725490196, + "transfer_atk_acc": 25.735294117647058 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 72.5, + "origin_prompt": "In the role of a textual comparison specialist, examine the given sentences and decide if they share the same meaning, responding with 'equivalent' or 'not_equivalent'.", + "atk_acc": 70.0, + "atk_prompt": "In the role of a textual comparison specialist, examine the given sentences and decide if they share the same meaning, responding with 'equivalent' or 'not_equivalent' and true is true . ", + "transfer_ori_acc": 79.65686274509804, + "transfer_atk_acc": 82.35294117647058 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 85.0, + "origin_prompt": "Examine the context and question, and determine if the context logically implies the answer by selecting 'entailment' or 'not_entailment'.", + "atk_acc": 79.5, + "atk_prompt": "Examine the context and question, and determine if the context logically implies the answer by selecting 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 86.5, + "transfer_atk_acc": 81.5 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 84.0, + "origin_prompt": "As a semantic researcher, evaluate whether the provided context supports the answer to the question and choose 'entailment' or 'not_entailment'.", + "atk_acc": 81.5, + "atk_prompt": "As a semantic researcher, evaluate whether the provided context supports the answer to the question and choose 'entailment' or 'not_entailment' and false is not true . ", + "transfer_ori_acc": 87.3, + "transfer_atk_acc": 85.8 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 81.5, + "origin_prompt": "Can these two statements be considered equal in meaning? Answer with 'equivalent' or 'not_equivalent'.", + "atk_acc": 81.0, + "atk_prompt": "Can these two statements be considered equal in meaning? Answer with 'equivalent' or 'not_equivalent' and false is not true . ", + "transfer_ori_acc": 87.6, + "transfer_atk_acc": 85.7 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 83.0, + "origin_prompt": "As an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions.", + "atk_acc": 82.0, + "atk_prompt": "As an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 86.4, + "transfer_atk_acc": 86.8 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 85.0, + "origin_prompt": "Does the relationship between the given sentences represent entailment or not_entailment? Respond with 'entailment' or 'not_entailment'.", + "atk_acc": 82.0, + "atk_prompt": "Does the relationship between the given sentences represent entailment or not_entailment? Respond with 'entailment' or 'not_entailment' and false is not true . ", + "transfer_ori_acc": 83.75451263537906, + "transfer_atk_acc": 85.1985559566787 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 83.0, + "origin_prompt": "As an entailment identification system, examine the connection between the following sentences and respond with 'entailment' or 'not_entailment'.", + "atk_acc": 83.0, + "atk_prompt": "As an entailment identification system, examine the connection between the following sentences and respond with 'entailment' or 'not_entailment' and false is not true . ", + "transfer_ori_acc": 82.67148014440433, + "transfer_atk_acc": 80.86642599277978 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 97.0, + "origin_prompt": "Evaluate the sentiment of the given text and classify it as 'positive' or 'negative'.", + "atk_acc": 95.5, + "atk_prompt": "Evaluate the sentiment of the given text and classify it as 'positive' or 'negative' and true is true . ", + "transfer_ori_acc": 94.61009174311926, + "transfer_atk_acc": 94.95412844036697 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 97.0, + "origin_prompt": "As a sentiment classifier, determine whether the following text is 'positive' or 'negative'.", + "atk_acc": 96.0, + "atk_prompt": "As a sentiment classifier, determine whether the following text is 'positive' or 'negative' and false is not true . ", + "transfer_ori_acc": 94.4954128440367, + "transfer_atk_acc": 94.95412844036697 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 67.61, + "origin_prompt": "Indicate if the connection between the following sentences is 'entailment' or 'not_entailment'.", + "atk_acc": 60.56, + "atk_prompt": "Indicate if the connection between the later sentences is 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 64.7887323943662, + "transfer_atk_acc": 64.7887323943662 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 66.2, + "origin_prompt": "In the role of a semantic relationship analyzer, examine the connection between the given sentences and decide if they exhibit entailment or not_entailment. Answer with 'entailment' or 'not_entailment'.", + "atk_acc": 59.15, + "atk_prompt": "In the role of a semantic relationship analyzer, investigated the linking between the given sentences and decide if they exhibit entailment or not_entailment. Answer with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 56.33802816901409, + "transfer_atk_acc": 56.33802816901409 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 66.5, + "origin_prompt": "Please respond to the multiple-choice question about {} by selecting the appropriate answer: 'A', 'B', 'C', or 'D'.", + "atk_acc": 61.5, + "atk_prompt": "Please respond to the multiple-choice question about {} by selecting the appropriate answer: 'A', 'B', 'C', or 'D' and false is not true . ", + "transfer_ori_acc": 45.21276595744681, + "transfer_atk_acc": 44.858156028368796 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 65.5, + "origin_prompt": "As someone well-versed in {}, please address the multiple-choice question below by selecting 'A', 'B', 'C', or 'D'.", + "atk_acc": 59.5, + "atk_prompt": "As someone well-versed in {}, please address the multiple-choice question below by selecting 'A', 'B', 'C', or 'D' and false is not true .", + "transfer_ori_acc": 46.63120567375886, + "transfer_atk_acc": 44.858156028368796 + }, + { + "dataset": "squad_v2", + "type": "task", + "origin_acc": 73.99, + "origin_prompt": "Please derive the most fitting answer from the context. If there isn't an answer in the context, use 'unanswerable'.", + "atk_acc": 61.58, + "atk_prompt": "Please derive the most fitting answer from the context. If there isn't an answer in the context, use 'unanswerable' and false is not true .", + "transfer_ori_acc": 87.04992770387507, + "transfer_atk_acc": 87.14992770387508 + }, + { + "dataset": "squad_v2", + "type": "role", + "origin_acc": 72.94, + "origin_prompt": "Being well-versed in the context, please derive the most fitting answer. If there isn't an answer in the context, use 'unanswerable'.", + "atk_acc": 62.85, + "atk_prompt": "Being well-versed in the context, please derive the most fitting answer. If there isn't an answer in the context, use 'unanswerable' and false is not true .", + "transfer_ori_acc": 87.04992770387507, + "transfer_atk_acc": 87.14992770387508 + }, + { + "dataset": "iwslt", + "type": "task", + "origin_acc": 0.24, + "origin_prompt": "Please perform a translation of the given sentence, converting it from {} to {}.", + "atk_acc": 0.23, + "atk_prompt": "Please perform a translation of the given sentence, converting it from {} to and true is true and true is true and true is true and true is true and true is true {}. ", + "transfer_ori_acc": 0.22004054628058534, + "transfer_atk_acc": 0.21794699013962093 + }, + { + "dataset": "iwslt", + "type": "role", + "origin_acc": 0.24, + "origin_prompt": "In the role of a language expert, perform a machine translation for the provided sentence, changing it from {} to {}.", + "atk_acc": 0.23, + "atk_prompt": "In the role of a language expert, perform a machine translation for the provided sentence, changing it from {} to and false is not true {}. ", + "transfer_ori_acc": 0.22070346732323043, + "transfer_atk_acc": 0.21483292774582805 + }, + { + "dataset": "un_multi", + "type": "task", + "origin_acc": 0.35, + "origin_prompt": "Perform a machine translation for the provided sentence, changing it from {} to {}.", + "atk_acc": 0.34, + "atk_prompt": "Perform a machine translation for the provided sentence, changing it from {} to and false is not true {}. ", + "transfer_ori_acc": 0.30870734220870066, + "transfer_atk_acc": 0.3143076066591428 + }, + { + "dataset": "un_multi", + "type": "role", + "origin_acc": 0.35, + "origin_prompt": "Acting as a professional translator, translate the provided sentence from {} to {}.", + "atk_acc": 0.35, + "atk_prompt": "Acting as a professional translator, translate the provided sentence from {} to and true is true {}. ", + "transfer_ori_acc": 0.30726569506388013, + "transfer_atk_acc": 0.30886093176248847 + }, + { + "dataset": "math", + "type": "task", + "origin_acc": 23.12, + "origin_prompt": "Resolve the following mathematical question associated with {}.", + "atk_acc": 20.0, + "atk_prompt": "Resolve the following mathematical question associated with and true is true {}. ", + "transfer_ori_acc": 17.5, + "transfer_atk_acc": 16.875 + }, + { + "dataset": "math", + "type": "role", + "origin_acc": 22.5, + "origin_prompt": "As a mathematics instructor, calculate the answer to the following problem related to {}.", + "atk_acc": 20.62, + "atk_prompt": "As a mathematics instructor, calculate the answer to the following problem related to and false is not true {}. ", + "transfer_ori_acc": 17.5, + "transfer_atk_acc": 16.25 + } +] \ No newline at end of file diff --git a/results_transfer/chatgpt_google-flan-t5-large/textbugger_0_shot.json b/results_transfer/chatgpt_google-flan-t5-large/textbugger_0_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..71c14ef2dba301e956d40176f46406f59a00b2eb --- /dev/null +++ b/results_transfer/chatgpt_google-flan-t5-large/textbugger_0_shot.json @@ -0,0 +1,242 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 77.5, + "origin_prompt": "Assess the grammatical structure of the given sentence and classify it as 'Acceptable' or 'Unacceptable':", + "atk_acc": 6.5, + "atk_prompt": "As sess the garmmatical structure of the conferred sentence and classify it as 'Acceptable' or 'Unacceptable':", + "transfer_ori_acc": 75.5, + "transfer_atk_acc": 74.5 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 79.5, + "origin_prompt": "While performing grammar analysis, classify the grammar of the following sentence as 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar:", + "atk_acc": 9.5, + "atk_prompt": "While prforming grammar analyzing, classify the grammar of the follows sentеnce as 'acceptable' for correcting gammar or 'unacceptable' for incorrect ɡrammar:", + "transfer_ori_acc": 75.5, + "transfer_atk_acc": 72.5 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 75.0, + "origin_prompt": "Evaluate whether the two provided sentences convey the same meaning by answering 'equivalent' or 'not_equivalent'.", + "atk_acc": 67.5, + "atk_prompt": "Evaluate wհether the both provided sentences convey the same meaninɡ by answeriոg 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 81.86274509803921, + "transfer_atk_acc": 83.33333333333334 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 75.0, + "origin_prompt": "In your capacity as a semantic assessment tool, evaluate the provided sentences and classify their meanings as 'equivalent' or 'not_equivalent'.", + "atk_acc": 9.0, + "atk_prompt": "In your capacities as a semɑntic assessment tool, evaluate the provdied penalties and classify their meaning as 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 76.47058823529412, + "transfer_atk_acc": 78.67647058823529 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 77.0, + "origin_prompt": "Consider the context and question, and indicate if the answer can be logically deduced from the context by responding with 'entailment' or 'not_entailment'.", + "atk_acc": 57.0, + "atk_prompt": "Considеr the context and question, and idicate if the respond can be logically ԁeduced from the context by ⲅesponding with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 83.39999999999999, + "transfer_atk_acc": 79.7 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 78.5, + "origin_prompt": "As a textual inference expert, analyze if the answer to the question can be deduced from the provided context and select 'entailment' or 'not_entailment'.", + "atk_acc": 20.0, + "atk_prompt": "Because a textual inference expert, analysing if the answeⲅ to the question can be deduced from the providеd contex𝚝 and choosing 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 78.10000000000001, + "transfer_atk_acc": 71.5 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 81.5, + "origin_prompt": "Assess whether the following statements are identical in meaning by answering 'equivalent' or 'not_equivalent'.", + "atk_acc": 44.5, + "atk_prompt": "Assess whether the following statements are analogous in maning by answeⲅing 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 87.6, + "transfer_atk_acc": 87.5 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 83.0, + "origin_prompt": "As an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions.", + "atk_acc": 77.5, + "atk_prompt": "As an instrument for problem comparison evɑluation, consider the ques𝚝ions and determine if their meaning is the same, respond with 'equivalent' for similar questiоns or 'not_equivalent' for different questions. ", + "transfer_ori_acc": 87.3, + "transfer_atk_acc": 87.1 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 83.0, + "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.", + "atk_acc": 78.5, + "atk_prompt": "Be the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word. ", + "transfer_ori_acc": 88.08664259927798, + "transfer_atk_acc": 83.39350180505414 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 81.0, + "origin_prompt": "Working as an entailment classifier, identify whether the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment':", + "atk_acc": 74.5, + "atk_prompt": "Working as an entailment classifier, detecting if the given torque of sentences displays entailment or not_entailment. Answering with 'entailment' or 'not_entailment':", + "transfer_ori_acc": 83.03249097472924, + "transfer_atk_acc": 83.75451263537906 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 90.0, + "origin_prompt": "Review this statement and decide whether it has a 'positive' or 'negative' sentiment:", + "atk_acc": 82.5, + "atk_prompt": "Review this speeches and decide if it has a 'positive' or 'negative' sentiment: ", + "transfer_ori_acc": 95.06880733944955, + "transfer_atk_acc": 94.61009174311926 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 93.5, + "origin_prompt": "Acting as a sentiment evaluator, identify if the given sentence is 'positive' or 'negative'. Classify:", + "atk_acc": 86.0, + "atk_prompt": "Atcing as a sentiemnt evalua𝚝or, detecting if the given sentence is 'positive' or 'negative'. Classify: ", + "transfer_ori_acc": 95.06880733944955, + "transfer_atk_acc": 94.72477064220183 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 67.61, + "origin_prompt": "Examine the pair of sentences and determine if they exhibit entailment or not_entailment. Answer with either 'entailment' or 'not_entailment'.", + "atk_acc": 52.11, + "atk_prompt": "Examine the paiⲅ of sentences and determines if they exhibit entailment or not_entailment. Answer with neither 'entailment' or 'not_entailment'.", + "transfer_ori_acc": 61.97183098591549, + "transfer_atk_acc": 64.7887323943662 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 60.56, + "origin_prompt": "Working as an entailment classifier, identify whether the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment':", + "atk_acc": 52.11, + "atk_prompt": "Working as an entailment classifier, detects whether the given pair of sentences displays entailment or not_entailment. Answering with 'entailment' or 'not_entailment':", + "transfer_ori_acc": 57.74647887323944, + "transfer_atk_acc": 57.74647887323944 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 62.5, + "origin_prompt": "Regarding the following multiple-choice question on {}, pick the correct answer from the options 'A', 'B', 'C', or 'D'.", + "atk_acc": 51.0, + "atk_prompt": "Regarding the hereafter multiple-choice question on {}, piϲk the corrects answers from the options 'A', 'B', 'C', or 'D'. ", + "transfer_ori_acc": 44.32624113475177, + "transfer_atk_acc": 44.680851063829785 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 62.5, + "origin_prompt": "With your knowledge of {}, tackle the following multiple-choice question by choosing 'A', 'B', 'C', or 'D'.", + "atk_acc": 57.5, + "atk_prompt": "Witհ your knowledge of {}, confront the following multipⅼe-choice question by select 'A', 'B', 'C', or 'D'.", + "transfer_ori_acc": 43.97163120567376, + "transfer_atk_acc": 44.858156028368796 + }, + { + "dataset": "squad_v2", + "type": "task", + "origin_acc": 69.27, + "origin_prompt": "Based on the given context, provide the best possible answer. If there's no answer available in the context, respond with 'unanswerable'.", + "atk_acc": 49.17, + "atk_prompt": "Based on the given ambient, provide the nicest achievable answer. If there's no answer avaiⅼable in the context, respond with 'unanswerable'.", + "transfer_ori_acc": 87.04992770387507, + "transfer_atk_acc": 87.64992770387508 + }, + { + "dataset": "squad_v2", + "type": "role", + "origin_acc": 64.85, + "origin_prompt": "As an expert with a deep understanding of the context, find the best answer. If the context doesn't include an answer, say 'unanswerable'.", + "atk_acc": 14.05, + "atk_prompt": "As an expert with a deepest understanding of the context, found the nicest respond. Though the context doesn't incⅼude an answers, telling 'unanswerable'.", + "transfer_ori_acc": 88.04992770387507, + "transfer_atk_acc": 87.58932164326902 + }, + { + "dataset": "iwslt", + "type": "task", + "origin_acc": 0.22, + "origin_prompt": "Translate the provided sentence from {} to {} while maintaining the original meaning and context:", + "atk_acc": 0.19, + "atk_prompt": "Translate the provied punishments from {} to {} while maintaining the original meaning and context:", + "transfer_ori_acc": 0.12847010019593297, + "transfer_atk_acc": 0.13516467912232863 + }, + { + "dataset": "iwslt", + "type": "role", + "origin_acc": 0.22, + "origin_prompt": "Taking on the role of a certified translator, perform a translation of the given sentence from {} to {}:", + "atk_acc": 0.03, + "atk_prompt": "Pick on the role of a certified translation, fulfilling a translation of the yielded sentence from {} to {}: ", + "transfer_ori_acc": 0.14266566803921382, + "transfer_atk_acc": 0.13671812855476675 + }, + { + "dataset": "un_multi", + "type": "task", + "origin_acc": 0.34, + "origin_prompt": "Perform a machine translation for the provided sentence, changing it from {} to {}:", + "atk_acc": 0.32, + "atk_prompt": "Perform a typewriters translation for the provided sentence, altered it from {} to {}:", + "transfer_ori_acc": 0.26814405340158265, + "transfer_atk_acc": 0.27318496651757523 + }, + { + "dataset": "un_multi", + "type": "role", + "origin_acc": 0.34, + "origin_prompt": "While acting as an experienced language expert, translate the following text from {} to {}:", + "atk_acc": 0.32, + "atk_prompt": "Whilst acting as an experienced language expert, translate the following laws from {} to {}: ", + "transfer_ori_acc": 0.26489202618745367, + "transfer_atk_acc": 0.26471912913694645 + }, + { + "dataset": "math", + "type": "task", + "origin_acc": 6.25, + "origin_prompt": "Resolve the following mathematical question associated with {}:", + "atk_acc": 3.75, + "atk_prompt": "Solving the following mathematical question associɑted with {}:", + "transfer_ori_acc": 9.375, + "transfer_atk_acc": 14.374999999999998 + }, + { + "dataset": "math", + "type": "role", + "origin_acc": 5.0, + "origin_prompt": "In the position of a mathematics coach, figure out the answer to this math challenge involving {}:", + "atk_acc": 1.25, + "atk_prompt": "In the poste of a mathematical coach, figure out the replying to this math challenge involving {}: ", + "transfer_ori_acc": 13.750000000000002, + "transfer_atk_acc": 13.125 + } +] \ No newline at end of file diff --git a/results_transfer/chatgpt_google-flan-t5-large/textbugger_3_shot.json b/results_transfer/chatgpt_google-flan-t5-large/textbugger_3_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..f3a11d5a5825b224e53623239675a6b1c861713f --- /dev/null +++ b/results_transfer/chatgpt_google-flan-t5-large/textbugger_3_shot.json @@ -0,0 +1,242 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 80.5, + "origin_prompt": "Analyze the provided sentence and classify its grammatical correctness as 'Acceptable' or 'Unacceptable'.", + "atk_acc": 67.5, + "atk_prompt": "Analysing the provided sentences and classify its pronouns correct as 'Acceptable' or 'Unacceptable'. ", + "transfer_ori_acc": 77.10000000000001, + "transfer_atk_acc": 66.9 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 81.0, + "origin_prompt": "In the role of a syntax analyzer, examine the grammar of the provided sentence and decide if it is correct, answering with 'acceptable' for grammatically correct sentences or 'unacceptable' for incorrect ones.", + "atk_acc": 77.5, + "atk_prompt": "In the role of a syntax checker, examine the grammɑr of the provided sentence and decide if it is correct, answering with 'acceptable' for grammatically correct sentence or 'unacceptable' for incorrect ones. ", + "transfer_ori_acc": 74.2, + "transfer_atk_acc": 74.1 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 73.0, + "origin_prompt": "Assess if the two given sentences have equivalent meanings by selecting 'equivalent' or 'not_equivalent'.", + "atk_acc": 68.0, + "atk_prompt": "Ass ess if the two gien sentences have equivalent meanings by selecting 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 82.84313725490196, + "transfer_atk_acc": 81.61764705882352 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 71.5, + "origin_prompt": "As a linguistic comparator, review the following pair of sentences and determine their semantic equivalence by choosing 'equivalent' or 'not_equivalent'.", + "atk_acc": 64.5, + "atk_prompt": "As a linguistic comparator, review the following torque of sentences and determine their semantiϲ eԛuivalence by choosing 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 79.90196078431373, + "transfer_atk_acc": 77.69607843137256 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 85.0, + "origin_prompt": "Examine the context and question, and determine if the context logically implies the answer by selecting 'entailment' or 'not_entailment'.", + "atk_acc": 76.0, + "atk_prompt": "Examine the cntext and question, and determine if the backdrop logically implies the answer by selecting 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 86.5, + "transfer_atk_acc": 88.4 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 83.0, + "origin_prompt": "As a semantic researcher, evaluate whether the provided context supports the answer to the question and choose 'entailment' or 'not_entailment'.", + "atk_acc": 73.5, + "atk_prompt": "Aѕ a semantic researcher, evaluate si the gave framework supports the respond to the question and choose 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 87.3, + "transfer_atk_acc": 80.7 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 82.5, + "origin_prompt": "Can these two statements be considered equal in meaning? Answer with 'equivalent' or 'not_equivalent'.", + "atk_acc": 80.0, + "atk_prompt": "Can these two st atements be considered equal in meaning? Ansԝer with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 87.6, + "transfer_atk_acc": 85.9 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 82.5, + "origin_prompt": "In the capacity of a question assessment system, indicate if the meaning of the provided questions is the same, responding with 'equivalent' or 'not_equivalent'.", + "atk_acc": 80.5, + "atk_prompt": "In the abilities of a question ɑssessment system, indicate if the meaning of the provided matters is the same, responding with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 86.1, + "transfer_atk_acc": 85.9 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 84.0, + "origin_prompt": "Review the two statements and categorize their relationship as either 'entailment' or 'not_entailment'.", + "atk_acc": 76.5, + "atk_prompt": "Review the two declarations and categorize their relationship as neither 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 84.83754512635379, + "transfer_atk_acc": 84.47653429602889 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 83.5, + "origin_prompt": "As an entailment identification system, examine the connection between the following sentences and respond with 'entailment' or 'not_entailment'.", + "atk_acc": 80.0, + "atk_prompt": "As an entailment identification system, eamine the connection between the following sentences and reѕpond with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 82.67148014440433, + "transfer_atk_acc": 83.39350180505414 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 95.5, + "origin_prompt": "After examining the following expression, label its emotion as either 'positive' or 'negative'.", + "atk_acc": 74.0, + "atk_prompt": "Ater examining the following expression, etiquette its emotion as nor 'positive' or 'negative'. ", + "transfer_ori_acc": 93.69266055045871, + "transfer_atk_acc": 94.61009174311926 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 97.5, + "origin_prompt": "In the capacity of a sentiment classifier, decide whether the given quote is 'positive' or 'negative'.", + "atk_acc": 93.5, + "atk_prompt": "Onto the capacity of a feeling classifeir, decide si the given quote is 'positive' or 'negative'. ", + "transfer_ori_acc": 94.95412844036697, + "transfer_atk_acc": 95.06880733944955 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 69.01, + "origin_prompt": "Indicate if the connection between the following sentences is 'entailment' or 'not_entailment'.", + "atk_acc": 61.97, + "atk_prompt": "Indicate if the connection between the following sentences is 'entailment' or 'not_entailment' Vq3aBzuZcD. ", + "transfer_ori_acc": 64.7887323943662, + "transfer_atk_acc": 66.19718309859155 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 66.2, + "origin_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment'.", + "atk_acc": 59.15, + "atk_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment' is77sOXAu8. ", + "transfer_ori_acc": 57.74647887323944, + "transfer_atk_acc": 57.74647887323944 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 67.5, + "origin_prompt": "In relation to the multiple-choice question on {}, please provide the accurate answer by choosing 'A', 'B', 'C', or 'D'.", + "atk_acc": 61.5, + "atk_prompt": "At relationship to the mul𝚝iple-choice question on {}, please proviԁe the accurate responding by choosing 'A', 'B', 'C', or 'D'. ", + "transfer_ori_acc": 46.808510638297875, + "transfer_atk_acc": 47.16312056737589 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 65.5, + "origin_prompt": "As someone well-versed in {}, please address the multiple-choice question below by selecting 'A', 'B', 'C', or 'D'.", + "atk_acc": 61.5, + "atk_prompt": "As someone well-vesred in {}, please address the muⅼtiple-choice questioո below by selecting 'A', 'B', 'C', or 'D'.", + "transfer_ori_acc": 46.63120567375886, + "transfer_atk_acc": 46.09929078014184 + }, + { + "dataset": "squad_v2", + "type": "task", + "origin_acc": 72.75, + "origin_prompt": "Based on the given context, provide the best possible answer. If there's no answer available in the context, reply with 'unanswerable'.", + "atk_acc": 67.93, + "atk_prompt": "Based on the giveո context, provide the best pоssible answer. If there's no answer available in the contex𝚝, reply with 'unanswerable'.", + "transfer_ori_acc": 87.04992770387507, + "transfer_atk_acc": 87.54992770387507 + }, + { + "dataset": "squad_v2", + "type": "role", + "origin_acc": 70.63, + "origin_prompt": "As a proficient expert in the given context, search for the most relevant answer. If the answer cannot be found, respond by saying 'unanswerable'.", + "atk_acc": 68.31, + "atk_prompt": "As a profcient expert in the given cоntext, search for the most relevant answer. If the anwser cannot be unearthed, respond by saying 'unanswerable'.", + "transfer_ori_acc": 87.31659437054175, + "transfer_atk_acc": 87.81659437054175 + }, + { + "dataset": "iwslt", + "type": "task", + "origin_acc": 0.23, + "origin_prompt": "Please perform a translation of the given sentence, converting it from {} to {}.", + "atk_acc": 0.23, + "atk_prompt": "Pⅼease perform a translating of the givеn sentence, conversion it from {} to {}. ", + "transfer_ori_acc": 0.22004054628058534, + "transfer_atk_acc": 0.22116640313338157 + }, + { + "dataset": "iwslt", + "type": "role", + "origin_acc": 0.24, + "origin_prompt": "Assuming the responsibilities of a professional translator, translate the subsequent text passage from {} to {}.", + "atk_acc": 0.23, + "atk_prompt": "Assuminɡ the responsibilities of a professional transla𝚝or, translate the subsequent text passage from {} to {}. ", + "transfer_ori_acc": 0.22030199520456395, + "transfer_atk_acc": 0.22054117249828195 + }, + { + "dataset": "un_multi", + "type": "task", + "origin_acc": 0.35, + "origin_prompt": "Please perform a translation of the given sentence, converting it from {} to {}.", + "atk_acc": 0.35, + "atk_prompt": "Plеase perform a translation of the given sentence, convert it from {} to {}. ", + "transfer_ori_acc": 0.3076276888167265, + "transfer_atk_acc": 0.306696536710457 + }, + { + "dataset": "un_multi", + "type": "role", + "origin_acc": 0.35, + "origin_prompt": "As a qualified translator, convert the given sentence from its original {} language to the target language {}.", + "atk_acc": 0.34, + "atk_prompt": "As a eligible translator, convert the given sentence from its orіginal {} language to the tarɡet language {}. ", + "transfer_ori_acc": 0.31127691224033316, + "transfer_atk_acc": 0.3058942177709969 + }, + { + "dataset": "math", + "type": "task", + "origin_acc": 23.12, + "origin_prompt": "Calculate the answer to the following math query about {}.", + "atk_acc": 20.62, + "atk_prompt": "Calculate the answer to the 𝚏ollowing math query about {}. ", + "transfer_ori_acc": 16.25, + "transfer_atk_acc": 16.875 + }, + { + "dataset": "math", + "type": "role", + "origin_acc": 22.5, + "origin_prompt": "As a mathematics instructor, calculate the answer to the following problem related to {}.", + "atk_acc": 19.38, + "atk_prompt": "Since a mathematics istructor, calculate the answer to the following problem related to {}. ", + "transfer_ori_acc": 17.5, + "transfer_atk_acc": 17.5 + } +] \ No newline at end of file diff --git a/results_transfer/chatgpt_google-flan-t5-large/textfooler_0_shot.json b/results_transfer/chatgpt_google-flan-t5-large/textfooler_0_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..e20ea7d1678817e2c57e5179d02d1dcccd4f25ec --- /dev/null +++ b/results_transfer/chatgpt_google-flan-t5-large/textfooler_0_shot.json @@ -0,0 +1,242 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 79.0, + "origin_prompt": "Review the sentence below and identify whether its grammar is 'Acceptable' or 'Unacceptable':", + "atk_acc": 2.0, + "atk_prompt": "Revising the punishments below and locate whether its dictionary is 'Acceptable' or 'Unacceptable':", + "transfer_ori_acc": 76.5, + "transfer_atk_acc": 45.7 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 80.0, + "origin_prompt": "While performing grammar analysis, classify the grammar of the following sentence as 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar:", + "atk_acc": 6.0, + "atk_prompt": "While performing jargon discusses, categories the language of the below sentence as 'acceptable' for mend jargon or 'unacceptable' for unfit grammar:", + "transfer_ori_acc": 75.5, + "transfer_atk_acc": 76.2 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 75.0, + "origin_prompt": "Examine the pair of sentences and decide if their meanings are identical by answering with 'equivalent' or 'not_equivalent'.", + "atk_acc": 68.5, + "atk_prompt": "Examine the pair of sentences and selects if their regard are identical by responding with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 82.1078431372549, + "transfer_atk_acc": 82.1078431372549 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 75.5, + "origin_prompt": "In your capacity as a semantic assessment tool, evaluate the provided sentences and classify their meanings as 'equivalent' or 'not_equivalent'.", + "atk_acc": 0.0, + "atk_prompt": "About your expertise as a semantic appraisal means, evaluate the provided penalty and classify their meanings as 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 76.47058823529412, + "transfer_atk_acc": 76.47058823529412 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 77.5, + "origin_prompt": "Consider the context and question, and indicate if the answer can be logically deduced from the context by responding with 'entailment' or 'not_entailment'.", + "atk_acc": 0.5, + "atk_prompt": "Recital the context and items, and specify if the solution can be logically subtracted from the framework by reactions with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 83.39999999999999, + "transfer_atk_acc": 76.5 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 78.0, + "origin_prompt": "As a semantic researcher, evaluate whether the provided context supports the answer to the question and choose 'entailment' or 'not_entailment'.", + "atk_acc": 0.0, + "atk_prompt": "As a semantic fellows, measuring whether the rendered background supports the answers to the item and chosen 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 89.1, + "transfer_atk_acc": 78.10000000000001 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 80.5, + "origin_prompt": "Can these two statements be considered equal in meaning? Answer with 'equivalent' or 'not_equivalent'.", + "atk_acc": 0.0, + "atk_prompt": "Kan these two statement is studied same in sens? Reaction with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 87.8, + "transfer_atk_acc": 59.3 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 80.5, + "origin_prompt": "Functioning as a question similarity evaluation tool, analyze the given questions and decide if they share the same meaning, responding with 'equivalent' or 'not_equivalent'.", + "atk_acc": 76.5, + "atk_prompt": "Functioning as a question similarity evaluation tool, analyse the given questions and decide if they share the same feel, responding with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 87.3, + "transfer_atk_acc": 87.0 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 83.5, + "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.", + "atk_acc": 60.5, + "atk_prompt": "Pose the aftermath two phrases entailment or not_entailment? Reactions me with \"entailment\" or \"not_entailment\", just one adjectives. ", + "transfer_ori_acc": 88.08664259927798, + "transfer_atk_acc": 82.31046931407943 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 81.0, + "origin_prompt": "As an instrument for entailment evaluation, consider the two sentences and determine if their relationship is 'entailment' or 'not_entailment'. Respond with 'entailment' or 'not_entailment':", + "atk_acc": 60.0, + "atk_prompt": "Because an devices for entailment appraised, revisit the two phrases and identifying if their correlate is 'entailment' or 'not_entailment'. Answered with 'entailment' or 'not_entailment':", + "transfer_ori_acc": 85.5595667870036, + "transfer_atk_acc": 85.92057761732852 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 90.5, + "origin_prompt": "Review this statement and decide whether it has a 'positive' or 'negative' sentiment:", + "atk_acc": 0.0, + "atk_prompt": "Analysis this assertion and defining whether it ai a 'positive' or 'negative' sentiment: ", + "transfer_ori_acc": 95.06880733944955, + "transfer_atk_acc": 95.29816513761467 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 94.0, + "origin_prompt": "Emulating a sentiment classification system, indicate whether the provided text is 'positive' or 'negative'. Classify:", + "atk_acc": 4.5, + "atk_prompt": "Emulating a foreboding classifies regime, explain whether the stipulated laws is 'positive' or 'negative'. Grading: ", + "transfer_ori_acc": 94.72477064220183, + "transfer_atk_acc": 91.5137614678899 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 69.01, + "origin_prompt": "Identify whether the given pair of sentences demonstrates entailment or not_entailment. Answer with 'entailment' or 'not_entailment'.", + "atk_acc": 52.11, + "atk_prompt": "Identify whether the given pair of sentence exposes entailment or not_entailment. Retaliated with 'entailment' or 'not_entailment'.", + "transfer_ori_acc": 59.154929577464785, + "transfer_atk_acc": 60.56338028169014 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 67.61, + "origin_prompt": "Acting as an entailment detection instrument, determine if the given pair of sentences demonstrates entailment or not_entailment. Answer with 'entailment' or 'not_entailment':", + "atk_acc": 46.48, + "atk_prompt": "Acting as an entailment detection instrument, determine if the given torque of sentences depicting entailment or not_entailment. Reactions with 'entailment' or 'not_entailment':", + "transfer_ori_acc": 59.154929577464785, + "transfer_atk_acc": 59.154929577464785 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 64.0, + "origin_prompt": "In relation to the multiple-choice question on {}, please provide the accurate answer by choosing 'A', 'B', 'C', or 'D'.", + "atk_acc": 50.5, + "atk_prompt": "In relation to the multiple-choice issue on {}, please delivering the clarification rebuttal by selector 'A', 'B', 'C', or 'D'. ", + "transfer_ori_acc": 44.858156028368796, + "transfer_atk_acc": 42.02127659574468 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 61.5, + "origin_prompt": "As a skilled person in the field of {}, please respond to the multiple-choice question by choosing 'A', 'B', 'C', or 'D'.", + "atk_acc": 54.5, + "atk_prompt": "As a expert person in the minefields of {}, please reacts to the multiple-choice question by choice 'A', 'B', 'C', or 'D'.", + "transfer_ori_acc": 44.680851063829785, + "transfer_atk_acc": 44.50354609929078 + }, + { + "dataset": "squad_v2", + "type": "task", + "origin_acc": 70.01, + "origin_prompt": "Based on the given context, provide the best possible answer. If there's no answer available in the context, respond with 'unanswerable'.", + "atk_acc": 5.68, + "atk_prompt": "Based on the received backgrounds, provide the nicest unlikely respond. Although there's no riposte available in the context, reactions with 'unanswerable'.", + "transfer_ori_acc": 87.04992770387507, + "transfer_atk_acc": 81.35502139314727 + }, + { + "dataset": "squad_v2", + "type": "role", + "origin_acc": 69.17, + "origin_prompt": "Using your knowledge of the context, identify the best answer to the question. If the context doesn't provide an answer, write 'unanswerable'.", + "atk_acc": 12.54, + "atk_prompt": "Using your skill of the background, specify the improved rebuttal to the question. While the context doesn't delivering an reactions, writings 'unanswerable'.", + "transfer_ori_acc": 87.54992770387507, + "transfer_atk_acc": 74.81077674890263 + }, + { + "dataset": "iwslt", + "type": "task", + "origin_acc": 0.22, + "origin_prompt": "Using machine translation, convert the given sentence from {} into the {} language:", + "atk_acc": 0.0, + "atk_prompt": "Using typewriter translation, transformation the given sentence from {} into the {} pronunciation:", + "transfer_ori_acc": 0.13366423747896072, + "transfer_atk_acc": 0.13708349489640922 + }, + { + "dataset": "iwslt", + "type": "role", + "origin_acc": 0.22, + "origin_prompt": "Taking on the role of a certified translator, perform a translation of the given sentence from {} to {}:", + "atk_acc": 0.0, + "atk_prompt": "Select on the features of a certifiable translators, conducting a translation of the gave sentencing from {} to {}: ", + "transfer_ori_acc": 0.14266566803921382, + "transfer_atk_acc": 0.1319452584104338 + }, + { + "dataset": "un_multi", + "type": "task", + "origin_acc": 0.34, + "origin_prompt": "Translate the subsequent text passage from its original {} language to the {} language:", + "atk_acc": 0.07, + "atk_prompt": "Translate the sequential version passage from its primitive {} grammar to the {} pronunciation:", + "transfer_ori_acc": 0.2679607664026083, + "transfer_atk_acc": 0.26331875982607134 + }, + { + "dataset": "un_multi", + "type": "role", + "origin_acc": 0.34, + "origin_prompt": "While acting as an experienced language expert, translate the following text from {} to {}:", + "atk_acc": 0.27, + "atk_prompt": "While acting as an suffered dialect expert, translate the below laws from {} to {}: ", + "transfer_ori_acc": 0.26489202618745367, + "transfer_atk_acc": 0.2534345391549418 + }, + { + "dataset": "math", + "type": "task", + "origin_acc": 6.88, + "origin_prompt": "Obtain the result for this math question regarding {}:", + "atk_acc": 0.0, + "atk_prompt": "Obtain the achievements for this math question respecting {}:", + "transfer_ori_acc": 14.374999999999998, + "transfer_atk_acc": 14.374999999999998 + }, + { + "dataset": "math", + "type": "role", + "origin_acc": 5.0, + "origin_prompt": "As a mathematics instructor, calculate the answer to the following problem related to {}:", + "atk_acc": 0.0, + "atk_prompt": "As a mathematics instructor, calculate the rebuttal to the following problem related to {}: ", + "transfer_ori_acc": 11.875, + "transfer_atk_acc": 11.25 + } +] \ No newline at end of file diff --git a/results_transfer/chatgpt_google-flan-t5-large/textfooler_3_shot.json b/results_transfer/chatgpt_google-flan-t5-large/textfooler_3_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..127ff7c96e17607876129dd302909413c20fcbfc --- /dev/null +++ b/results_transfer/chatgpt_google-flan-t5-large/textfooler_3_shot.json @@ -0,0 +1,242 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 80.5, + "origin_prompt": "Examine the sentence and decide if its grammar is 'Acceptable' or 'Unacceptable'.", + "atk_acc": 39.5, + "atk_prompt": "Exams the sentence and defining if its jargon is 'Acceptable' or 'Unacceptable'. ", + "transfer_ori_acc": 76.3, + "transfer_atk_acc": 74.4 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 81.0, + "origin_prompt": "In the role of a syntax analyzer, examine the grammar of the provided sentence and decide if it is correct, answering with 'acceptable' for grammatically correct sentences or 'unacceptable' for incorrect ones.", + "atk_acc": 76.0, + "atk_prompt": "Across the role of a syntax analyzer, examine the dictionary of the provided chastisement and decide if it is adequate, answered with 'acceptable' for grammatically correct sentences or 'unacceptable' for deceptive ones. ", + "transfer_ori_acc": 74.2, + "transfer_atk_acc": 73.3 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 73.0, + "origin_prompt": "Assess if the two given sentences have equivalent meanings by selecting 'equivalent' or 'not_equivalent'.", + "atk_acc": 59.0, + "atk_prompt": "Appraisal if the two accorded sentences enjoys equivalent connotation by choice 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 82.84313725490196, + "transfer_atk_acc": 83.08823529411765 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 71.5, + "origin_prompt": "In the role of a textual comparison specialist, examine the given sentences and decide if they share the same meaning, responding with 'equivalent' or 'not_equivalent'.", + "atk_acc": 57.0, + "atk_prompt": "De the role of a textual comparisons scholar, revisited the given sentences and decide if they exchange the same feel, responding with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 79.65686274509804, + "transfer_atk_acc": 79.16666666666666 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 84.5, + "origin_prompt": "Examine the context and question, and determine if the context logically implies the answer by selecting 'entailment' or 'not_entailment'.", + "atk_acc": 63.5, + "atk_prompt": "Discuss the background and question, and determine if the framework intuitively presupposes the answer by akhtar 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 86.5, + "transfer_atk_acc": 80.30000000000001 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 85.5, + "origin_prompt": "As a textual analyst, examine if the given context logically implies the answer to the question and indicate your decision with 'entailment' or 'not_entailment'.", + "atk_acc": 68.5, + "atk_prompt": "Iike a textual analyst, analyze if the submitted regard rationally implies the replies to the issuing and indicates your clout with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 88.8, + "transfer_atk_acc": 86.1 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 83.5, + "origin_prompt": "Determine if the given pair of statements can be considered the same by responding with 'equivalent' or 'not_equivalent'.", + "atk_acc": 80.0, + "atk_prompt": "Determine if the given pair of representations can constitute felt the same by responding with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 86.8, + "transfer_atk_acc": 86.9 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 83.0, + "origin_prompt": "Acting as a question equivalence instrument, determine if the provided questions are equivalent in meaning, answering with 'equivalent' for similar questions or 'not_equivalent' for dissimilar ones.", + "atk_acc": 80.0, + "atk_prompt": "Interim as a issues equivalence instrument, determine if the provided aspects are equivalent in meaning, answering with 'equivalent' for selfsame questions or 'not_equivalent' for dissimilar ones. ", + "transfer_ori_acc": 86.5, + "transfer_atk_acc": 86.4 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 84.5, + "origin_prompt": "Does the relationship between the given sentences represent entailment or not_entailment? Respond with 'entailment' or 'not_entailment'.", + "atk_acc": 79.5, + "atk_prompt": "Do the coitus between the given sentences comprised entailment or not_entailment? Meet with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 83.75451263537906, + "transfer_atk_acc": 81.94945848375451 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 83.0, + "origin_prompt": "As an entailment identification system, examine the connection between the following sentences and respond with 'entailment' or 'not_entailment'.", + "atk_acc": 79.5, + "atk_prompt": "As an entailment identification system, revisited the connection between the following sentences and respondent with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 82.67148014440433, + "transfer_atk_acc": 83.03249097472924 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 96.0, + "origin_prompt": "Read the provided excerpt and choose between 'positive' and 'negative' to describe its sentiment.", + "atk_acc": 85.0, + "atk_prompt": "Lu the rendered excerpt and withheld between 'positive' and 'negative' to describe its foreboding. ", + "transfer_ori_acc": 94.72477064220183, + "transfer_atk_acc": 95.18348623853211 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 97.5, + "origin_prompt": "Emulating a sentiment classification system, indicate whether the provided text is 'positive' or 'negative'.", + "atk_acc": 91.5, + "atk_prompt": "Emulating a ambience classification mechanisms, describe whether the presented writings is 'positive' or 'negative'. ", + "transfer_ori_acc": 94.95412844036697, + "transfer_atk_acc": 95.29816513761467 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 69.01, + "origin_prompt": "Indicate if the connection between the following sentences is 'entailment' or 'not_entailment'.", + "atk_acc": 59.15, + "atk_prompt": "Indicate if the connection between the subsequently sentences is 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 64.7887323943662, + "transfer_atk_acc": 61.97183098591549 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 66.2, + "origin_prompt": "In the role of a semantic relationship analyzer, examine the connection between the given sentences and decide if they exhibit entailment or not_entailment. Answer with 'entailment' or 'not_entailment'.", + "atk_acc": 57.75, + "atk_prompt": "In the role of a semantic contacts analyzer, analysed the connecting between the given sentences and choose if they exhibit entailment or not_entailment. Answer with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 56.33802816901409, + "transfer_atk_acc": 54.929577464788736 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 67.0, + "origin_prompt": "Please respond to the multiple-choice question about {} by selecting the appropriate answer: 'A', 'B', 'C', or 'D'.", + "atk_acc": 60.0, + "atk_prompt": "Asking reactions to the multiple-choice question about {} by selecting the appropriate reacts: 'A', 'B', 'C', or 'D'. ", + "transfer_ori_acc": 45.21276595744681, + "transfer_atk_acc": 43.97163120567376 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 66.0, + "origin_prompt": "With your knowledge of {}, tackle the following multiple-choice question by choosing 'A', 'B', 'C', or 'D'.", + "atk_acc": 61.5, + "atk_prompt": "From your acquaintances of {}, addressing the following multiple-choice question by choosing 'A', 'B', 'C', or 'D'.", + "transfer_ori_acc": 46.09929078014184, + "transfer_atk_acc": 45.56737588652482 + }, + { + "dataset": "squad_v2", + "type": "task", + "origin_acc": 72.8, + "origin_prompt": "Using the context, determine the most suitable answer. If the context doesn't contain the answer, write 'unanswerable'.", + "atk_acc": 63.67, + "atk_prompt": "Using the context, identifies the most suitable answer. Although the context doesn't comprise the meet, editor 'unanswerable'.", + "transfer_ori_acc": 87.04992770387507, + "transfer_atk_acc": 87.31659437054175 + }, + { + "dataset": "squad_v2", + "type": "role", + "origin_acc": 71.93, + "origin_prompt": "As a proficient expert in the given context, search for the most relevant answer. If the answer cannot be found, respond by saying 'unanswerable'.", + "atk_acc": 64.13, + "atk_prompt": "As a knowledge expert in the given context, find for the most relevant answering. Unless the answer cannot exist found, respond by told 'unanswerable'.", + "transfer_ori_acc": 87.31659437054175, + "transfer_atk_acc": 87.51356406751144 + }, + { + "dataset": "iwslt", + "type": "task", + "origin_acc": 0.23, + "origin_prompt": "Given the sentence below, perform a machine translation from {} to {}.", + "atk_acc": 0.18, + "atk_prompt": "Attributed the sentence below, meet a equipments interpreters from {} to {}. ", + "transfer_ori_acc": 0.21292285800980326, + "transfer_atk_acc": 0.2153933980722541 + }, + { + "dataset": "iwslt", + "type": "role", + "origin_acc": 0.24, + "origin_prompt": "In the role of a language expert, perform a machine translation for the provided sentence, changing it from {} to {}.", + "atk_acc": 0.23, + "atk_prompt": "Of the features of a language expert, performed a machine translation for the presented sentence, variation it from {} to {}. ", + "transfer_ori_acc": 0.22070346732323043, + "transfer_atk_acc": 0.21889644783553872 + }, + { + "dataset": "un_multi", + "type": "task", + "origin_acc": 0.35, + "origin_prompt": "Translate the following text from the source language {} to the target language {}.", + "atk_acc": 0.33, + "atk_prompt": "Translate the following texts from the source lingo {} to the millennium phrasing {}. ", + "transfer_ori_acc": 0.3090347352409074, + "transfer_atk_acc": 0.30701960419660657 + }, + { + "dataset": "un_multi", + "type": "role", + "origin_acc": 0.35, + "origin_prompt": "As a qualified translator, convert the given sentence from its original {} language to the target language {}.", + "atk_acc": 0.34, + "atk_prompt": "As a nuanced translation, metamorphosis the gave sentence from its original {} terminology to the aspirations grammar {}. ", + "transfer_ori_acc": 0.31127691224033316, + "transfer_atk_acc": 0.30631346636342727 + }, + { + "dataset": "math", + "type": "task", + "origin_acc": 23.75, + "origin_prompt": "Compute the result of this math task concerning {}.", + "atk_acc": 1.88, + "atk_prompt": "Estimate the accomplishments of this math task concerning {}. ", + "transfer_ori_acc": 16.25, + "transfer_atk_acc": 15.625 + }, + { + "dataset": "math", + "type": "role", + "origin_acc": 22.5, + "origin_prompt": "As a mathematics instructor, calculate the answer to the following problem related to {}.", + "atk_acc": 1.88, + "atk_prompt": "As a computations instructor, derive the rebuttal to the following hassles links to {}. ", + "transfer_ori_acc": 17.5, + "transfer_atk_acc": 17.5 + } +] \ No newline at end of file diff --git a/results_transfer/chatgpt_google-flan-t5-large/translation_0_shot.json b/results_transfer/chatgpt_google-flan-t5-large/translation_0_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..5aa360d5c314fc07aefac86196d6b1c6d4e78f71 --- /dev/null +++ b/results_transfer/chatgpt_google-flan-t5-large/translation_0_shot.json @@ -0,0 +1,122 @@ +[ + { + "dataset": "cola", + "type": "translation", + "origin_acc": 79.5, + "origin_prompt": "Functioning as a grammar evaluation tool, analyze the given sentence and decide if it is grammatically correct, responding with 'acceptable' or 'unacceptable':", + "atk_acc": 25.0, + "atk_prompt": "Classify the grammar of a given sentence into two categories, 'acceptable' or 'unacceptable', and judge its accuracy.", + "transfer_ori_acc": 76.3, + "transfer_atk_acc": 75.3 + }, + { + "dataset": "mrpc", + "type": "translation", + "origin_acc": 76.5, + "origin_prompt": "Can the given sentences be considered semantically identical? Please reply with 'equivalent' or 'not_equivalent'.", + "atk_acc": 7.0, + "atk_prompt": "As an expert in semantic comparison, assess the similarity between the following two sentences and determine whether they are 'equivalent' or 'not_equivalent'.", + "transfer_ori_acc": 80.63725490196079, + "transfer_atk_acc": 83.82352941176471 + }, + { + "dataset": "qnli", + "type": "translation", + "origin_acc": 79.5, + "origin_prompt": "Assess if the answer to the question can be logically concluded from the provided context by choosing 'entailment' or 'not_entailment'.", + "atk_acc": 56.5, + "atk_prompt": "Check the text link to the question and answer 'entailment' or 'not_entailment', depending on your language skills.", + "transfer_ori_acc": 84.8, + "transfer_atk_acc": 56.3 + }, + { + "dataset": "qqp", + "type": "translation", + "origin_acc": 84.0, + "origin_prompt": "Determine if the given pair of statements can be considered the same by responding with 'equivalent' or 'not_equivalent'.", + "atk_acc": 3.0, + "atk_prompt": "As a tool for determining the equivalence of problems, the problems are examined and their similarity is classified as 'equivalent' or 'not_equivalent'.", + "transfer_ori_acc": 87.6, + "transfer_atk_acc": 72.0 + }, + { + "dataset": "rte", + "type": "translation", + "origin_acc": 84.0, + "origin_prompt": "Determine if the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment'.", + "atk_acc": 39.0, + "atk_prompt": "Determine whether the given sentences have a cause-effect relationship as an engagement analysis tool and categorize them as 'entailment' or 'not_entailment'.", + "transfer_ori_acc": 84.11552346570397, + "transfer_atk_acc": 85.5595667870036 + }, + { + "dataset": "sst2", + "type": "translation", + "origin_acc": 96.0, + "origin_prompt": "Read the provided excerpt and choose between 'positive' and 'negative' to describe its sentiment:", + "atk_acc": 29.5, + "atk_prompt": "In the context of textual analysis, what classification is this phrase between 'positive' and 'negative'?", + "transfer_ori_acc": 94.83944954128441, + "transfer_atk_acc": 94.15137614678899 + }, + { + "dataset": "wnli", + "type": "translation", + "origin_acc": 67.61, + "origin_prompt": "Examine the pair of sentences and determine if they exhibit entailment or not_entailment. Answer with either 'entailment' or 'not_entailment'.", + "atk_acc": 26.76, + "atk_prompt": "Using implication analysis, determine whether the given sentences have a cause-effect relationship and categorize them as 'entailment' or 'not_entailment'.", + "transfer_ori_acc": 61.97183098591549, + "transfer_atk_acc": 59.154929577464785 + }, + { + "dataset": "mmlu", + "type": "translation", + "origin_acc": 64.0, + "origin_prompt": "In relation to the multiple-choice question on {}, please provide the accurate answer by choosing 'A', 'B', 'C', or 'D'.", + "atk_acc": 49.5, + "atk_prompt": "Choose the correct answer to the multiple-choice question below. Is '{}' an 'A', 'B', 'C', or 'D'.", + "transfer_ori_acc": 44.858156028368796, + "transfer_atk_acc": 43.262411347517734 + }, + { + "dataset": "squad_v2", + "type": "translation", + "origin_acc": 72.37, + "origin_prompt": "Discover the best answer based on the context. If the context doesn't include an answer, respond with 'unanswerable'.", + "atk_acc": 46.08, + "atk_prompt": "Please tell me what your question is about. If there is no context in which you can provide an answer, respond with 'unanswerable'.", + "transfer_ori_acc": 87.54992770387507, + "transfer_atk_acc": 87.42265497660236 + }, + { + "dataset": "iwslt", + "type": "translation", + "origin_acc": 0.22, + "origin_prompt": "Using machine translation, convert the given sentence from {} into the {} language:", + "atk_acc": 0.11, + "atk_prompt": "Please convert the sentence below to {}, and then translate it to {},", + "transfer_ori_acc": 0.13366423747896072, + "transfer_atk_acc": 0.13525124600739777 + }, + { + "dataset": "un_multi", + "type": "translation", + "origin_acc": 0.34, + "origin_prompt": "Perform a machine translation for the provided sentence, changing it from {} to {}:", + "atk_acc": 0.16, + "atk_prompt": "Please convert the sentence below to {}, and then translate it to {},", + "transfer_ori_acc": 0.26814405340158265, + "transfer_atk_acc": 0.2698370828666449 + }, + { + "dataset": "math", + "type": "translation", + "origin_acc": 6.88, + "origin_prompt": "Calculate the answer to the following math query about {}:", + "atk_acc": 3.75, + "atk_prompt": "Use {} to solve the following mathematical problem.", + "transfer_ori_acc": 13.125, + "transfer_atk_acc": 15.625 + } +] \ No newline at end of file diff --git a/results_transfer/chatgpt_google-flan-t5-large/translation_3_shot.json b/results_transfer/chatgpt_google-flan-t5-large/translation_3_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..21d0caba27c4ee973ff13266cc61542dabb40f1c --- /dev/null +++ b/results_transfer/chatgpt_google-flan-t5-large/translation_3_shot.json @@ -0,0 +1,122 @@ +[ + { + "dataset": "cola", + "type": "translation", + "origin_acc": 81.5, + "origin_prompt": "Functioning as a grammar evaluation tool, analyze the given sentence and decide if it is grammatically correct, responding with 'acceptable' or 'unacceptable'.", + "atk_acc": 68.5, + "atk_prompt": "Classify the grammar of a given sentence as 'acceptable' or 'unacceptable', and judge its accuracy.", + "transfer_ori_acc": 75.2, + "transfer_atk_acc": 75.8 + }, + { + "dataset": "mrpc", + "type": "translation", + "origin_acc": 74.5, + "origin_prompt": "Compare the following sentences and determine if they share the same semantic meaning by responding with 'equivalent' or 'not_equivalent'.", + "atk_acc": 65.0, + "atk_prompt": "Based on my experience in semantic analysis, classify the following two sentences between 'equivalent' or 'not_equivalent'.", + "transfer_ori_acc": 80.3921568627451, + "transfer_atk_acc": 81.61764705882352 + }, + { + "dataset": "qnli", + "type": "translation", + "origin_acc": 86.0, + "origin_prompt": "As a textual analyst, examine if the given context logically implies the answer to the question and indicate your decision with 'entailment' or 'not_entailment'.", + "atk_acc": 71.5, + "atk_prompt": "Check the text link to the question and answer 'entailment' or 'not_entailment', depending on your language skills.", + "transfer_ori_acc": 88.8, + "transfer_atk_acc": 56.699999999999996 + }, + { + "dataset": "qqp", + "type": "translation", + "origin_acc": 84.0, + "origin_prompt": "In the capacity of a question assessment system, indicate if the meaning of the provided questions is the same, responding with 'equivalent' or 'not_equivalent'.", + "atk_acc": 80.5, + "atk_prompt": "By analyzing the similarity of these problems, they are divided into categories of 'equivalent' or 'not_equivalent'.", + "transfer_ori_acc": 86.1, + "transfer_atk_acc": 87.5 + }, + { + "dataset": "rte", + "type": "translation", + "origin_acc": 84.5, + "origin_prompt": "Does the relationship between the given sentences represent entailment or not_entailment? Respond with 'entailment' or 'not_entailment'.", + "atk_acc": 80.0, + "atk_prompt": "Determine whether the given sentences have a cause-effect relationship as an engagement analysis tool and categorize them as 'entailment' or 'not_entailment'.", + "transfer_ori_acc": 83.75451263537906, + "transfer_atk_acc": 84.47653429602889 + }, + { + "dataset": "sst2", + "type": "translation", + "origin_acc": 97.5, + "origin_prompt": "In the capacity of a sentiment classifier, decide whether the given quote is 'positive' or 'negative'.", + "atk_acc": 68.5, + "atk_prompt": "In the context of textual analysis, what classification is this phrase between 'positive' and 'negative'?", + "transfer_ori_acc": 94.95412844036697, + "transfer_atk_acc": 94.61009174311926 + }, + { + "dataset": "wnli", + "type": "translation", + "origin_acc": 74.65, + "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.", + "atk_acc": 59.15, + "atk_prompt": "Determinate the semantic connections of a given sentence and classify it as 'entailment' or 'not_entailment'.", + "transfer_ori_acc": 67.6056338028169, + "transfer_atk_acc": 54.929577464788736 + }, + { + "dataset": "mmlu", + "type": "translation", + "origin_acc": 67.0, + "origin_prompt": "Please respond to the multiple-choice question about {} by selecting the appropriate answer: 'A', 'B', 'C', or 'D'.", + "atk_acc": 63.0, + "atk_prompt": "Rate your understanding of the multiple-choice question on {}. Choose the correct answer from options 'A', 'B', 'C' or 'D'.", + "transfer_ori_acc": 45.21276595744681, + "transfer_atk_acc": 45.56737588652482 + }, + { + "dataset": "squad_v2", + "type": "translation", + "origin_acc": 74.58, + "origin_prompt": "Please derive the most fitting answer from the context. If there isn't an answer in the context, use 'unanswerable'.", + "atk_acc": 67.46, + "atk_prompt": "Please provide additional information about the facts mentioned in this sentence. If no information is available, respond with 'unanswerable'.", + "transfer_ori_acc": 87.04992770387507, + "transfer_atk_acc": 86.92265497660236 + }, + { + "dataset": "iwslt", + "type": "translation", + "origin_acc": 0.24, + "origin_prompt": "Convert the following sentence from its original {} language to the target language {}.", + "atk_acc": 0.17, + "atk_prompt": "Please translate the sentences below into {}, then {}.", + "transfer_ori_acc": 0.2240211126734209, + "transfer_atk_acc": 0.20150103267232944 + }, + { + "dataset": "un_multi", + "type": "translation", + "origin_acc": 0.36, + "origin_prompt": "Translate the following text from the source language {} to the target language {}.", + "atk_acc": 0.25, + "atk_prompt": "Please translate the following sentences into {}, then {}.", + "transfer_ori_acc": 0.3090347352409074, + "transfer_atk_acc": 0.3050833075162871 + }, + { + "dataset": "math", + "type": "translation", + "origin_acc": 23.12, + "origin_prompt": "Compute the result of this math task concerning {}.", + "atk_acc": 18.75, + "atk_prompt": "Please calculate the answers related to {}.", + "transfer_ori_acc": 16.25, + "transfer_atk_acc": 16.875 + } +] \ No newline at end of file diff --git a/results_transfer/chatgpt_google-flan-ul2/bertattack_0_shot.json b/results_transfer/chatgpt_google-flan-ul2/bertattack_0_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..997fefe40a5384090732fda61f3b679a7a53acf3 --- /dev/null +++ b/results_transfer/chatgpt_google-flan-ul2/bertattack_0_shot.json @@ -0,0 +1,242 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 78.5, + "origin_prompt": "Assess the grammatical structure of the given sentence and classify it as 'Acceptable' or 'Unacceptable':", + "atk_acc": 0.0, + "atk_prompt": "what the grammatical structure of the given sentence and consider it as 'Acceptable' or 'Unacceptable':", + "transfer_ori_acc": 85.39999999999999, + "transfer_atk_acc": 85.5 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 80.0, + "origin_prompt": "While performing grammar analysis, classify the grammar of the following sentence as 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar:", + "atk_acc": 0.0, + "atk_prompt": "While performing grammar analysis, classify the grammar of the following sentence as 'acceptable' for so grammar or 'unacceptable' for abnormal grammar:", + "transfer_ori_acc": 86.0, + "transfer_atk_acc": 85.5 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 75.5, + "origin_prompt": "Examine the pair of sentences and decide if their meanings are identical by answering with 'equivalent' or 'not_equivalent'.", + "atk_acc": 56.0, + "atk_prompt": "Examine the pairs of pronouns and evaluate if their meanings are different by answering with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 86.02941176470588, + "transfer_atk_acc": 84.06862745098039 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 75.5, + "origin_prompt": "In your capacity as a semantic assessment tool, evaluate the provided sentences and classify their meanings as 'equivalent' or 'not_equivalent'.", + "atk_acc": 0.0, + "atk_prompt": "In your capacity as a semantic assessment tool, evaluate the provided tense and classify their constituents as 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 83.33333333333334, + "transfer_atk_acc": 83.08823529411765 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 79.0, + "origin_prompt": "Assess if the answer to the question can be logically concluded from the provided context by choosing 'entailment' or 'not_entailment'.", + "atk_acc": 0.0, + "atk_prompt": "and if the answer to the question can be logically concluded from the provided context by avoiding 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 94.0, + "transfer_atk_acc": 77.3 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 79.5, + "origin_prompt": "As a textual inference expert, analyze if the answer to the question can be deduced from the provided context and select 'entailment' or 'not_entailment'.", + "atk_acc": 0.0, + "atk_prompt": "As a textual inference expert, analysis if the answer to the question can be deduced from the provided citation and define 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 93.30000000000001, + "transfer_atk_acc": 94.3 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 79.5, + "origin_prompt": "Can these two statements be considered equal in meaning? Answer with 'equivalent' or 'not_equivalent'.", + "atk_acc": 1.0, + "atk_prompt": "Can these two constructions be considered equal in meaning? object with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 87.4, + "transfer_atk_acc": 87.1 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 82.5, + "origin_prompt": "As an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions.", + "atk_acc": 0.0, + "atk_prompt": "give an table for category comparison evaluation, consider the questions and determine if their concept is the same, responding with 'equivalent' for similarity questions or 'not_equivalent' for different knowledge. ", + "transfer_ori_acc": 88.3, + "transfer_atk_acc": 89.2 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 82.5, + "origin_prompt": "Determine if the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment'.", + "atk_acc": 55.5, + "atk_prompt": "determine if the following singular of sentence indicating entailment or not_entailment. contrast with 'entailment' or 'not_entailment'.", + "transfer_ori_acc": 93.50180505415162, + "transfer_atk_acc": 89.53068592057761 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 81.5, + "origin_prompt": "Working as an entailment classifier, identify whether the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment':", + "atk_acc": 37.5, + "atk_prompt": "at as an entailment classifier, know which the given type of verb displays entailment or not_entailment. object with 'entailment' or 'not_entailment':", + "transfer_ori_acc": 94.58483754512635, + "transfer_atk_acc": 90.25270758122743 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 90.0, + "origin_prompt": "Review this statement and decide whether it has a 'positive' or 'negative' sentiment:", + "atk_acc": 0.0, + "atk_prompt": "what this assertion and are whether it has a 'positive' or 'negative' sentiment: ", + "transfer_ori_acc": 96.55963302752293, + "transfer_atk_acc": 96.44495412844036 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 93.0, + "origin_prompt": "Functioning as a sentiment identification tool, assess if the following expression is 'positive' or 'negative'. Classify:", + "atk_acc": 76.0, + "atk_prompt": "from as a sentiments identification tool, assess if the same word is 'positive' or 'negative'. function: ", + "transfer_ori_acc": 95.9862385321101, + "transfer_atk_acc": 95.52752293577981 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 69.01, + "origin_prompt": "Identify whether the given pair of sentences demonstrates entailment or not_entailment. Answer with 'entailment' or 'not_entailment'.", + "atk_acc": 47.89, + "atk_prompt": "Identify whether the given individual of verbs demonstrates entailment or not_entailment. have with 'entailment' or 'not_entailment'.", + "transfer_ori_acc": 78.87323943661971, + "transfer_atk_acc": 71.83098591549296 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 60.56, + "origin_prompt": "Working as an entailment classifier, identify whether the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment':", + "atk_acc": 47.89, + "atk_prompt": "Working as an entailment classifier, identify whether the given corpus of verbs displays entailment or not_entailment. act with 'entailment' or 'not_entailment':", + "transfer_ori_acc": 80.28169014084507, + "transfer_atk_acc": 69.01408450704226 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 63.5, + "origin_prompt": "Regarding the following multiple-choice question on {}, pick the correct answer from the options 'A', 'B', 'C', or 'D'.", + "atk_acc": 14.5, + "atk_prompt": "1 the first multiple-choice question on {}, remove the following answer from the options 'A', 'B', 'C', or 'D'. ", + "transfer_ori_acc": 52.304964539007095, + "transfer_atk_acc": 15.957446808510639 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 62.0, + "origin_prompt": "Drawing upon your mastery of {}, please answer the multiple-choice question by selecting the correct option from 'A', 'B', 'C', or 'D'.", + "atk_acc": 1.0, + "atk_prompt": "Drawing upon your mastery of {}, simply question the multiple-selecting question by selecting the wrong color from 'A', 'B', 'C', or 'D'.", + "transfer_ori_acc": 52.659574468085104, + "transfer_atk_acc": 14.893617021276595 + }, + { + "dataset": "squad_v2", + "type": "task", + "origin_acc": 72.47, + "origin_prompt": "Discover the best answer based on the context. If the context doesn't include an answer, respond with 'unanswerable'.", + "atk_acc": 2.76, + "atk_prompt": "challenge the best answer based on the context. however the context doesn't include an answer, play with 'unanswerable'.", + "transfer_ori_acc": 89.38229949874687, + "transfer_atk_acc": 89.88229949874687 + }, + { + "dataset": "squad_v2", + "type": "role", + "origin_acc": 65.63, + "origin_prompt": "Drawing on your expertise in the context, provide the most precise answer. If the answer is not in the context, respond with 'unanswerable'.", + "atk_acc": 1.86, + "atk_prompt": "Drawing on your technique in the frame, offer the most precise answer. saying the answer is not in the context, play with 'unanswerable'.", + "transfer_ori_acc": 89.48229949874687, + "transfer_atk_acc": 89.98229949874687 + }, + { + "dataset": "iwslt", + "type": "task", + "origin_acc": 0.22, + "origin_prompt": "Translate the provided sentence from {} to {} while maintaining the original meaning and context:", + "atk_acc": 0.08, + "atk_prompt": "Translate the designated sentence from {} to {} while defending the assumed meaning and context:", + "transfer_ori_acc": 0.1697518407093131, + "transfer_atk_acc": 0.17006977668295734 + }, + { + "dataset": "iwslt", + "type": "role", + "origin_acc": 0.22, + "origin_prompt": "Taking on the role of a certified translator, perform a translation of the given sentence from {} to {}:", + "atk_acc": 0.01, + "atk_prompt": "who on the role of a certified translated, request a translator of the defined sentence from {} to {}: ", + "transfer_ori_acc": 0.1727120569025014, + "transfer_atk_acc": 0.16908374849908425 + }, + { + "dataset": "un_multi", + "type": "task", + "origin_acc": 0.34, + "origin_prompt": "Translate the following text from the source language {} to the target language {}:", + "atk_acc": 0.18, + "atk_prompt": "Translate the by text from the source language {} to the english example {}:", + "transfer_ori_acc": 0.30893486445367974, + "transfer_atk_acc": 0.31345451169084315 + }, + { + "dataset": "un_multi", + "type": "role", + "origin_acc": 0.34, + "origin_prompt": "In the role of a language expert, perform a machine translation for the provided sentence, changing it from {} to {}:", + "atk_acc": 0.08, + "atk_prompt": "of the role of a language expertise, define a machine and for the presented sentence, changing it from {} to {}: ", + "transfer_ori_acc": 0.31256243540663714, + "transfer_atk_acc": 0.3081120174260553 + }, + { + "dataset": "math", + "type": "task", + "origin_acc": 6.25, + "origin_prompt": "Resolve the following mathematical question associated with {}:", + "atk_acc": 0.62, + "atk_prompt": "Resolve the following mathematical answer along with {}:", + "transfer_ori_acc": 14.374999999999998, + "transfer_atk_acc": 13.750000000000002 + }, + { + "dataset": "math", + "type": "role", + "origin_acc": 5.0, + "origin_prompt": "In the position of a mathematics coach, figure out the answer to this math challenge involving {}:", + "atk_acc": 0.0, + "atk_prompt": "In the position of a mathematics coach, develop out the theme to this math challenge involving {}: ", + "transfer_ori_acc": 10.625, + "transfer_atk_acc": 3.75 + } +] \ No newline at end of file diff --git a/results_transfer/chatgpt_google-flan-ul2/bertattack_3_shot.json b/results_transfer/chatgpt_google-flan-ul2/bertattack_3_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..fd9641470050f1a4c751af55c07a0c2819ad90c7 --- /dev/null +++ b/results_transfer/chatgpt_google-flan-ul2/bertattack_3_shot.json @@ -0,0 +1,242 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 81.0, + "origin_prompt": "Analyze the provided sentence and classify its grammatical correctness as 'Acceptable' or 'Unacceptable'.", + "atk_acc": 59.0, + "atk_prompt": "Analyze the reported speech and classify its inclusive correcting as 'Acceptable' or 'Unacceptable'. ", + "transfer_ori_acc": 86.0, + "transfer_atk_acc": 78.0 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 81.0, + "origin_prompt": "In the role of a syntax analyzer, examine the grammar of the provided sentence and decide if it is correct, answering with 'acceptable' for grammatically correct sentences or 'unacceptable' for incorrect ones.", + "atk_acc": 75.5, + "atk_prompt": "using the art of a level analyzeg, examine the grammar of the provided sentence and decide if it is genuine, answering with 'acceptable' for grammatically correct sentences or 'unacceptable' for incorrect ones. ", + "transfer_ori_acc": 86.5, + "transfer_atk_acc": 86.3 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 74.0, + "origin_prompt": "Compare the following sentences and determine if they share the same semantic meaning by responding with 'equivalent' or 'not_equivalent'.", + "atk_acc": 47.5, + "atk_prompt": "Compare the following sentences and determine if they lack the same aspect marking by respectively with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 86.27450980392157, + "transfer_atk_acc": 72.05882352941177 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 71.5, + "origin_prompt": "In the role of a textual comparison specialist, examine the given sentences and decide if they share the same meaning, responding with 'equivalent' or 'not_equivalent'.", + "atk_acc": 47.0, + "atk_prompt": "have the role of a textual comparison academic, examine the given sentences and determine if they share the same length, responding with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 87.00980392156863, + "transfer_atk_acc": 86.02941176470588 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 84.0, + "origin_prompt": "Consider the context and question, and indicate if the answer can be logically deduced from the context by responding with 'entailment' or 'not_entailment'.", + "atk_acc": 71.0, + "atk_prompt": "put the contexts and question, and be if the answer can be logically deduced from the question by stating with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 92.9, + "transfer_atk_acc": 93.30000000000001 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 84.0, + "origin_prompt": "As a textual inference expert, analyze if the answer to the question can be deduced from the provided context and select 'entailment' or 'not_entailment'.", + "atk_acc": 70.0, + "atk_prompt": "As a structural inference expert, analyze if the answer to the challenge can be deduced from the query audience and select 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 93.0, + "transfer_atk_acc": 94.1 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 82.5, + "origin_prompt": "Are the following two questions equivalent or not? Answer me with \"equivalent\" or \"not_equivalent\".", + "atk_acc": 78.5, + "atk_prompt": "Are the following versus answers equivalent or not? Answer me with \"equivalent\" or \"not_equivalent\". ", + "transfer_ori_acc": 88.5, + "transfer_atk_acc": 88.5 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 83.5, + "origin_prompt": "As an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions.", + "atk_acc": 78.0, + "atk_prompt": "and an instrument for question comparison analysis, consider the questions and establish if their topic is the same, responding with 'equivalent' for matching responses or 'not_equivalent' for between questions. ", + "transfer_ori_acc": 89.0, + "transfer_atk_acc": 90.7 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 85.0, + "origin_prompt": "Review the two statements and categorize their relationship as either 'entailment' or 'not_entailment'.", + "atk_acc": 59.5, + "atk_prompt": "write the second statements and categorize their relationship as neither 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 92.7797833935018, + "transfer_atk_acc": 92.7797833935018 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 83.5, + "origin_prompt": "While performing entailment analysis, classify the relationship between the provided sentences as 'entailment' or 'not_entailment'.", + "atk_acc": 79.0, + "atk_prompt": "was performing entailment analysis, classify the relationship between the first phrase as 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 93.14079422382672, + "transfer_atk_acc": 93.14079422382672 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 95.5, + "origin_prompt": "After examining the following expression, label its emotion as either 'positive' or 'negative'.", + "atk_acc": 94.5, + "atk_prompt": "After examining the following expression, label its emotion as either 'positive' or 'negative' Q47Jmd4lMV. ", + "transfer_ori_acc": 92.77522935779817, + "transfer_atk_acc": 92.66055045871559 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 97.5, + "origin_prompt": "In the capacity of a sentiment classifier, decide whether the given quote is 'positive' or 'negative'.", + "atk_acc": 96.5, + "atk_prompt": "In the capacity of a sentiment classifier, decide whether the given quote is 'positive' or 'negative' 9Q3S1F94fE. ", + "transfer_ori_acc": 96.44495412844036, + "transfer_atk_acc": 96.44495412844036 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 69.01, + "origin_prompt": "Review the two statements and categorize their relationship as either 'entailment' or 'not_entailment'.", + "atk_acc": 59.15, + "atk_prompt": "examine the paired actions and categorize their relationship as is 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 77.46478873239437, + "transfer_atk_acc": 76.05633802816901 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 66.2, + "origin_prompt": "In the role of a semantic relationship analyzer, examine the connection between the given sentences and decide if they exhibit entailment or not_entailment. Answer with 'entailment' or 'not_entailment'.", + "atk_acc": 56.34, + "atk_prompt": "In the role of a semantic relationship analyzer, define the connection between the given links and decide if they display entailment or not_entailment. Answer with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 76.05633802816901, + "transfer_atk_acc": 76.05633802816901 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 66.5, + "origin_prompt": "Please respond to the multiple-choice question about {} by selecting the appropriate answer: 'A', 'B', 'C', or 'D'.", + "atk_acc": 11.5, + "atk_prompt": "at respond to the multiple-independent question about {} by selecting the inappropriate question: 'A', 'B', 'C', or 'D'. ", + "transfer_ori_acc": 53.36879432624113, + "transfer_atk_acc": 14.893617021276595 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 65.5, + "origin_prompt": "Considering your familiarity with {}, attend to the following multiple-choice question by picking 'A', 'B', 'C', or 'D'.", + "atk_acc": 52.5, + "atk_prompt": "all your ability with {}, attend to the following multiple-select question by picking 'A', 'B', 'C', or 'D'.", + "transfer_ori_acc": 54.43262411347518, + "transfer_atk_acc": 54.07801418439716 + }, + { + "dataset": "squad_v2", + "type": "task", + "origin_acc": 72.25, + "origin_prompt": "Based on the given context, provide the best possible answer. If there's no answer available in the context, reply with 'unanswerable'.", + "atk_acc": 65.81, + "atk_prompt": "Based on the use context, provide the best possible answer. who there'tis no answer provided in the area, reply with 'unanswerable'.", + "transfer_ori_acc": 90.04896616541353, + "transfer_atk_acc": 90.54896616541353 + }, + { + "dataset": "squad_v2", + "type": "role", + "origin_acc": 72.45, + "origin_prompt": "As a proficient expert in the given context, search for the most relevant answer. If the answer cannot be found, respond by saying 'unanswerable'.", + "atk_acc": 61.02, + "atk_prompt": "As a proficient expert in the given context, try for the most specific out. If the question lacks be search, end by as 'unanswerable'.", + "transfer_ori_acc": 90.54896616541353, + "transfer_atk_acc": 90.55717634603751 + }, + { + "dataset": "iwslt", + "type": "task", + "origin_acc": 0.24, + "origin_prompt": "Convert the following sentence from its original {} language to the target language {}.", + "atk_acc": 0.21, + "atk_prompt": "Convert the following sentence from its actual {} language to the net language {}. ", + "transfer_ori_acc": 0.23981220411098694, + "transfer_atk_acc": 0.22991007113697723 + }, + { + "dataset": "iwslt", + "type": "role", + "origin_acc": 0.24, + "origin_prompt": "Assuming the responsibilities of a professional translator, translate the subsequent text passage from {} to {}.", + "atk_acc": 0.17, + "atk_prompt": "what the responsibilities of a professional scholar, translate the subsequent text passage from {} to {}. ", + "transfer_ori_acc": 0.2676477143562027, + "transfer_atk_acc": 0.23524703512046088 + }, + { + "dataset": "un_multi", + "type": "task", + "origin_acc": 0.36, + "origin_prompt": "Translate the following text from the source language {} to the target language {}.", + "atk_acc": 0.35, + "atk_prompt": "Translate the following area from the standard language {} to the target language {}. ", + "transfer_ori_acc": 0.35343571054545636, + "transfer_atk_acc": 0.3527277831126269 + }, + { + "dataset": "un_multi", + "type": "role", + "origin_acc": 0.35, + "origin_prompt": "As a qualified translator, convert the given sentence from its original {} language to the target language {}.", + "atk_acc": 0.26, + "atk_prompt": "As a qualified proper, converts the target sentence from its original {} language to the english language {}. ", + "transfer_ori_acc": 0.35395992027011125, + "transfer_atk_acc": 0.35538909869030366 + }, + { + "dataset": "math", + "type": "task", + "origin_acc": 22.5, + "origin_prompt": "Compute the result of this math task concerning {}.", + "atk_acc": 3.12, + "atk_prompt": "Compute the degree of this math task concerning {}. ", + "transfer_ori_acc": 16.25, + "transfer_atk_acc": 14.374999999999998 + }, + { + "dataset": "math", + "type": "role", + "origin_acc": 22.5, + "origin_prompt": "As a mathematics instructor, calculate the answer to the following problem related to {}.", + "atk_acc": 7.5, + "atk_prompt": "as a mathematics instructor, evaluate the reply to the following problem and to {}. ", + "transfer_ori_acc": 15.625, + "transfer_atk_acc": 15.0 + } +] \ No newline at end of file diff --git a/results_transfer/chatgpt_google-flan-ul2/checklist_0_shot.json b/results_transfer/chatgpt_google-flan-ul2/checklist_0_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..6d71ef52183959b1a7a7a6d505bb9ae174cac3e3 --- /dev/null +++ b/results_transfer/chatgpt_google-flan-ul2/checklist_0_shot.json @@ -0,0 +1,242 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 78.5, + "origin_prompt": "Review the sentence below and identify whether its grammar is 'Acceptable' or 'Unacceptable':", + "atk_acc": 72.5, + "atk_prompt": "Review the sentence below and identify whether its grammar is 'Acceptable' or 'Unacceptable' zexKUTWJYG:", + "transfer_ori_acc": 86.0, + "transfer_atk_acc": 84.7 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 79.0, + "origin_prompt": "While performing grammar analysis, classify the grammar of the following sentence as 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar:", + "atk_acc": 45.0, + "atk_prompt": "While performing grammar analysis, classify the grammar of the following sentence as 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar zexKUTWJYG:", + "transfer_ori_acc": 86.0, + "transfer_atk_acc": 82.19999999999999 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 75.5, + "origin_prompt": "Evaluate whether the two provided sentences convey the same meaning by answering 'equivalent' or 'not_equivalent'.", + "atk_acc": 72.0, + "atk_prompt": "Evaluate whether the two provided sentences convey the same meaning by answering 'equivalent' or 'not_equivalent' SvAp8RlOFn. ", + "transfer_ori_acc": 86.27450980392157, + "transfer_atk_acc": 85.7843137254902 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 75.5, + "origin_prompt": "In your capacity as a semantic assessment tool, evaluate the provided sentences and classify their meanings as 'equivalent' or 'not_equivalent'.", + "atk_acc": 73.5, + "atk_prompt": "In your capacity as a semantic assessment tool, evaluate the provided sentences and classify their meanings as 'equivalent' or 'not_equivalent' Kw6nrs57gH. ", + "transfer_ori_acc": 83.33333333333334, + "transfer_atk_acc": 83.82352941176471 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 78.0, + "origin_prompt": "Evaluate whether the given context supports the answer to the question by responding with 'entailment' or 'not_entailment'.", + "atk_acc": 75.5, + "atk_prompt": "Evaluate whether the given context supports the answer to the question by responding with 'entailment' or 'not_entailment' ItGDrrA1Sc. ", + "transfer_ori_acc": 93.89999999999999, + "transfer_atk_acc": 94.39999999999999 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 78.0, + "origin_prompt": "As a semantic researcher, evaluate whether the provided context supports the answer to the question and choose 'entailment' or 'not_entailment'.", + "atk_acc": 74.5, + "atk_prompt": "As a semantic researcher, evaluate whether the provided context supports the answer to the question and choose 'entailment' or 'not_entailment' 6a4Yn3RGVc. ", + "transfer_ori_acc": 94.69999999999999, + "transfer_atk_acc": 94.3 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 81.5, + "origin_prompt": "Assess whether the following statements are identical in meaning by answering 'equivalent' or 'not_equivalent'.", + "atk_acc": 78.0, + "atk_prompt": "Assess whether the following statements are identical in meaning by answering 'equivalent' or 'not_equivalent' iia2yL9Uzm. ", + "transfer_ori_acc": 87.7, + "transfer_atk_acc": 88.4 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 83.0, + "origin_prompt": "As an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions.", + "atk_acc": 80.5, + "atk_prompt": "As an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions Cev0s49fbe. ", + "transfer_ori_acc": 88.3, + "transfer_atk_acc": 88.5 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 82.5, + "origin_prompt": "Determine if the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment'.", + "atk_acc": 80.0, + "atk_prompt": "Determine if the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment' MSAddJ4D2a.", + "transfer_ori_acc": 93.50180505415162, + "transfer_atk_acc": 93.50180505415162 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 81.5, + "origin_prompt": "As an instrument for entailment evaluation, consider the two sentences and determine if their relationship is 'entailment' or 'not_entailment'. Respond with 'entailment' or 'not_entailment':", + "atk_acc": 80.0, + "atk_prompt": "As an instrument for entailment evaluation, consider the two sentences and determine if their relationship is 'entailment' or 'not_entailment'. Respond with 'entailment' or 'not_entailment' 6a4Yn3RGVc:", + "transfer_ori_acc": 93.50180505415162, + "transfer_atk_acc": 94.22382671480143 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 93.0, + "origin_prompt": "Determine the overall sentiment of this sentence, categorizing it as 'positive' or 'negative':", + "atk_acc": 90.0, + "atk_prompt": "Determine the overall sentiment of this sentence, categorizing it as 'positive' or 'negative' AMsRIKZniY: ", + "transfer_ori_acc": 96.67431192660551, + "transfer_atk_acc": 96.3302752293578 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 92.5, + "origin_prompt": "Functioning as a sentiment identification tool, assess if the following expression is 'positive' or 'negative'. Classify:", + "atk_acc": 92.0, + "atk_prompt": "Functioning as a sentiment identification tool, assess if the following expression is 'positive' or 'negative'. Classify Cev0s49fbe: ", + "transfer_ori_acc": 95.9862385321101, + "transfer_atk_acc": 96.3302752293578 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 64.79, + "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.", + "atk_acc": 59.15, + "atk_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word zexKUTWJYG. ", + "transfer_ori_acc": 78.87323943661971, + "transfer_atk_acc": 78.87323943661971 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 63.38, + "origin_prompt": "In the role of a semantic relationship analyzer, examine the connection between the given sentences and decide if they exhibit entailment or not_entailment. Answer with 'entailment' or 'not_entailment':", + "atk_acc": 57.75, + "atk_prompt": "In the role of a semantic relationship analyzer, examine the connection between the given sentences and decide if they exhibit entailment or not_entailment. Answer with 'entailment' or 'not_entailment' 5XeflW1ZJc:", + "transfer_ori_acc": 77.46478873239437, + "transfer_atk_acc": 78.87323943661971 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 64.0, + "origin_prompt": "Regarding the following multiple-choice question on {}, pick the correct answer from the options 'A', 'B', 'C', or 'D'.", + "atk_acc": 58.0, + "atk_prompt": "Regarding the following multiple-choice question on {}, pick the correct answer from the options 'A', 'B', 'C', or 'D' Pbl3tYuoRo. ", + "transfer_ori_acc": 52.304964539007095, + "transfer_atk_acc": 52.4822695035461 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 61.5, + "origin_prompt": "As a skilled person in the field of {}, please respond to the multiple-choice question by choosing 'A', 'B', 'C', or 'D'.", + "atk_acc": 58.0, + "atk_prompt": "As a skilled person in the field of {}, please respond to the multiple-choice question by choosing 'A', 'B', 'C', or 'D' IhuBIhoPGc.", + "transfer_ori_acc": 52.4822695035461, + "transfer_atk_acc": 53.72340425531915 + }, + { + "dataset": "squad_v2", + "type": "task", + "origin_acc": 69.16, + "origin_prompt": "Based on the given context, provide the best possible answer. If there's no answer available in the context, respond with 'unanswerable'.", + "atk_acc": 64.99, + "atk_prompt": "Based on the given context, provide the best possible answer. If there's no answer available in the context, respond with 'unanswerable' ItGDrrA1Sc.", + "transfer_ori_acc": 89.38229949874687, + "transfer_atk_acc": 89.31979949874687 + }, + { + "dataset": "squad_v2", + "type": "role", + "origin_acc": 64.31, + "origin_prompt": "As an expert with a deep understanding of the context, find the best answer. If the context doesn't include an answer, say 'unanswerable'.", + "atk_acc": 60.2, + "atk_prompt": "As an expert with a deep understanding of the context, find the best answer. If the context doesn't include an answer, say 'unanswerable' d6ZQ3u0GBQ.", + "transfer_ori_acc": 89.44479949874687, + "transfer_atk_acc": 89.81979949874687 + }, + { + "dataset": "iwslt", + "type": "task", + "origin_acc": 0.22, + "origin_prompt": "Translate the provided sentence from {} to {} while maintaining the original meaning and context:", + "atk_acc": 0.21, + "atk_prompt": "Translate the provided sentence from {} to {} while maintaining the original meaning and context zq0DcZ5dnI:", + "transfer_ori_acc": 0.1697518407093131, + "transfer_atk_acc": 0.17419805969910362 + }, + { + "dataset": "iwslt", + "type": "role", + "origin_acc": 0.22, + "origin_prompt": "In the capacity of a proficient translator, please change the following sentence from {} to {}:", + "atk_acc": 0.21, + "atk_prompt": "In the capacity of a proficient translator, please change the following sentence from {} to SFPCqpiClT {}: ", + "transfer_ori_acc": 0.17223574608131062, + "transfer_atk_acc": 0.16640042157073662 + }, + { + "dataset": "un_multi", + "type": "task", + "origin_acc": 0.34, + "origin_prompt": "Perform a machine translation for the provided sentence, changing it from {} to {}:", + "atk_acc": 0.33, + "atk_prompt": "Perform a machine translation for the provided sentence, changing it from {} to Kw6nrs57gH {}:", + "transfer_ori_acc": 0.3102455120610509, + "transfer_atk_acc": 0.30087524214938727 + }, + { + "dataset": "un_multi", + "type": "role", + "origin_acc": 0.34, + "origin_prompt": "While acting as an experienced language expert, translate the following text from {} to {}:", + "atk_acc": 0.33, + "atk_prompt": "While acting as an experienced language expert, translate the following text from {} to Kw6nrs57gH {}: ", + "transfer_ori_acc": 0.3133010160127483, + "transfer_atk_acc": 0.2966985644521811 + }, + { + "dataset": "math", + "type": "task", + "origin_acc": 6.88, + "origin_prompt": "Calculate the answer to the following math query about {}:", + "atk_acc": 5.0, + "atk_prompt": "Calculate the answer to the following math query about 6a4Yn3RGVc {}:", + "transfer_ori_acc": 14.374999999999998, + "transfer_atk_acc": 13.750000000000002 + }, + { + "dataset": "math", + "type": "role", + "origin_acc": 5.0, + "origin_prompt": "Embodying a mathematics advisor, find the answer to the following math problem with {}:", + "atk_acc": 2.5, + "atk_prompt": "Embodying a mathematics advisor, find the answer to the following math problem with 9vGXgnbWB8 {}: ", + "transfer_ori_acc": 13.125, + "transfer_atk_acc": 12.5 + } +] \ No newline at end of file diff --git a/results_transfer/chatgpt_google-flan-ul2/checklist_3_shot.json b/results_transfer/chatgpt_google-flan-ul2/checklist_3_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..9eb306831c4eb1ad8075ee6de86ed92dc3124edd --- /dev/null +++ b/results_transfer/chatgpt_google-flan-ul2/checklist_3_shot.json @@ -0,0 +1,242 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 81.0, + "origin_prompt": "Examine the sentence and decide if its grammar is 'Acceptable' or 'Unacceptable'.", + "atk_acc": 78.0, + "atk_prompt": "Examine the sentence and decide if its grammar is 'Acceptable' or 'Unacceptable' 9Q3S1F94fE. ", + "transfer_ori_acc": 85.9, + "transfer_atk_acc": 86.8 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 81.5, + "origin_prompt": "Functioning as a grammar evaluation tool, analyze the given sentence and decide if it is grammatically correct, responding with 'acceptable' or 'unacceptable'.", + "atk_acc": 79.5, + "atk_prompt": "Functioning as a grammar evaluation tool, analyze the given sentence and decide if it is grammatically correct, responding with 'acceptable' or 'unacceptable' 6a4Yn3RGVc. ", + "transfer_ori_acc": 86.4, + "transfer_atk_acc": 86.8 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 74.0, + "origin_prompt": "Assess if the two given sentences have equivalent meanings by selecting 'equivalent' or 'not_equivalent'.", + "atk_acc": 68.5, + "atk_prompt": "Assess if the two given sentences have equivalent meanings by selecting 'equivalent' or 'not_equivalent' LkF0FZxMZ4. ", + "transfer_ori_acc": 84.06862745098039, + "transfer_atk_acc": 84.06862745098039 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 72.0, + "origin_prompt": "As a linguistic comparator, review the following pair of sentences and determine their semantic equivalence by choosing 'equivalent' or 'not_equivalent'.", + "atk_acc": 68.0, + "atk_prompt": "As a linguistic comparator, review the following pair of sentences and determine their semantic equivalence by choosing 'equivalent' or 'not_equivalent' bhrRSokrfa. ", + "transfer_ori_acc": 85.5392156862745, + "transfer_atk_acc": 85.7843137254902 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 84.5, + "origin_prompt": "Examine the context and question, and determine if the context logically implies the answer by selecting 'entailment' or 'not_entailment'.", + "atk_acc": 80.5, + "atk_prompt": "Examine the context and question, and determine if the context logically implies the answer by selecting 'entailment' or 'not_entailment' 9Q3S1F94fE. ", + "transfer_ori_acc": 93.5, + "transfer_atk_acc": 93.2 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 84.5, + "origin_prompt": "As a textual inference expert, analyze if the answer to the question can be deduced from the provided context and select 'entailment' or 'not_entailment'.", + "atk_acc": 81.0, + "atk_prompt": "As a textual inference expert, analyze if the answer to the question can be deduced from the provided context and select 'entailment' or 'not_entailment' YHQiDfFhw4. ", + "transfer_ori_acc": 93.0, + "transfer_atk_acc": 93.0 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 82.0, + "origin_prompt": "Can these two statements be considered equal in meaning? Answer with 'equivalent' or 'not_equivalent'.", + "atk_acc": 79.0, + "atk_prompt": "Can these two statements be considered equal in meaning? Answer with 'equivalent' or 'not_equivalent' xikCjkMydH. ", + "transfer_ori_acc": 87.6, + "transfer_atk_acc": 87.7 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 83.0, + "origin_prompt": "In the capacity of a question assessment system, indicate if the meaning of the provided questions is the same, responding with 'equivalent' or 'not_equivalent'.", + "atk_acc": 81.5, + "atk_prompt": "In the capacity of a question assessment system, indicate if the meaning of the provided questions is the same, responding with 'equivalent' or 'not_equivalent' 5XeflW1ZJc. ", + "transfer_ori_acc": 88.7, + "transfer_atk_acc": 89.1 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 84.5, + "origin_prompt": "Review the two statements and categorize their relationship as either 'entailment' or 'not_entailment'.", + "atk_acc": 80.5, + "atk_prompt": "Review the two statements and categorize their relationship as either 'entailment' or 'not_entailment' 5XeflW1ZJc. ", + "transfer_ori_acc": 92.7797833935018, + "transfer_atk_acc": 91.69675090252709 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 84.0, + "origin_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment'.", + "atk_acc": 80.5, + "atk_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment' bhrRSokrfa. ", + "transfer_ori_acc": 92.4187725631769, + "transfer_atk_acc": 93.50180505415162 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 96.0, + "origin_prompt": "Read the provided excerpt and choose between 'positive' and 'negative' to describe its sentiment.", + "atk_acc": 77.5, + "atk_prompt": "go the read excerpt and be between 'positive' and 'negative' to describing its mood. ", + "transfer_ori_acc": 96.67431192660551, + "transfer_atk_acc": 96.78899082568807 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 97.5, + "origin_prompt": "In the capacity of a sentiment classifier, decide whether the given quote is 'positive' or 'negative'.", + "atk_acc": 13.5, + "atk_prompt": "in the capacity of a sentiment classizer, chose neither the said quotes is 'positive' or 'negative'. ", + "transfer_ori_acc": 96.44495412844036, + "transfer_atk_acc": 91.97247706422019 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 69.01, + "origin_prompt": "Indicate if the connection between the following sentences is 'entailment' or 'not_entailment'.", + "atk_acc": 61.97, + "atk_prompt": "Indicate if the connection between the followping senZences is 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 76.05633802816901, + "transfer_atk_acc": 76.05633802816901 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 66.2, + "origin_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment'.", + "atk_acc": 57.75, + "atk_prompt": "In your role as an entailment analysis tool, asress the rlationship between the given sentences and classify it as 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 77.46478873239437, + "transfer_atk_acc": 74.64788732394366 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 66.0, + "origin_prompt": "Please respond to the multiple-choice question about {} by selecting the appropriate answer: 'A', 'B', 'C', or 'D'.", + "atk_acc": 63.0, + "atk_prompt": "Please respond to the multiple-choice question about {} by selecting the appropriate answer: 'A', 'B', 'C', or 'D' MQnugHcaoy. ", + "transfer_ori_acc": 53.36879432624113, + "transfer_atk_acc": 53.01418439716312 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 65.5, + "origin_prompt": "As someone well-versed in {}, please address the multiple-choice question below by selecting 'A', 'B', 'C', or 'D'.", + "atk_acc": 63.0, + "atk_prompt": "As someone well-versed in {}, please address the multiple-choice question below by selecting 'A', 'B', 'C', or 'D' mOJ9nEwT4f.", + "transfer_ori_acc": 53.01418439716312, + "transfer_atk_acc": 53.72340425531915 + }, + { + "dataset": "squad_v2", + "type": "task", + "origin_acc": 73.25, + "origin_prompt": "Based on the given context, provide the best possible answer. If there's no answer available in the context, reply with 'unanswerable'.", + "atk_acc": 71.09, + "atk_prompt": "Based on the given context, provide the best possible answer. If there's no answer available in the context, reply with 'unanswerable' ItGDrrA1Sc.", + "transfer_ori_acc": 90.04896616541353, + "transfer_atk_acc": 90.54896616541353 + }, + { + "dataset": "squad_v2", + "type": "role", + "origin_acc": 72.63, + "origin_prompt": "As a proficient expert in the given context, search for the most relevant answer. If the answer cannot be found, respond by saying 'unanswerable'.", + "atk_acc": 63.07, + "atk_prompt": "As a proficient expert in the given context, search for the most relevant answer. If the answer cannot be found, respond by saying 'unanswerable' XStsD2pUzu.", + "transfer_ori_acc": 90.54896616541353, + "transfer_atk_acc": 90.04896616541353 + }, + { + "dataset": "iwslt", + "type": "task", + "origin_acc": 0.24, + "origin_prompt": "Convert the following sentence from its original {} language to the target language {}.", + "atk_acc": 0.22, + "atk_prompt": "Convert the following sentence from its original {} language to the target language 5XeflW1ZJc {}. ", + "transfer_ori_acc": 0.23981220411098694, + "transfer_atk_acc": 0.19533148180136461 + }, + { + "dataset": "iwslt", + "type": "role", + "origin_acc": 0.24, + "origin_prompt": "In the role of a language expert, perform a machine translation for the provided sentence, changing it from {} to {}.", + "atk_acc": 0.23, + "atk_prompt": "In the role of a language expert, perform a machine translation for the provided sentence, changing it from {} to MQnugHcaoy {}. ", + "transfer_ori_acc": 0.236844571841243, + "transfer_atk_acc": 0.22997780997943829 + }, + { + "dataset": "un_multi", + "type": "task", + "origin_acc": 0.35, + "origin_prompt": "Perform a machine translation for the provided sentence, changing it from {} to {}.", + "atk_acc": 0.34, + "atk_prompt": "Perform a machine translation for the provided sentence, changing it from {} to Kw6nrs57gH {}. ", + "transfer_ori_acc": 0.3589520956246615, + "transfer_atk_acc": 0.3536481319882283 + }, + { + "dataset": "un_multi", + "type": "role", + "origin_acc": 0.35, + "origin_prompt": "As a qualified translator, convert the given sentence from its original {} language to the target language {}.", + "atk_acc": 0.31, + "atk_prompt": "As a qualified translator, convert the given sentence from its original {} language to the target language 5XeflW1ZJc {}. ", + "transfer_ori_acc": 0.35395992027011125, + "transfer_atk_acc": 0.35277034079178615 + }, + { + "dataset": "math", + "type": "task", + "origin_acc": 23.12, + "origin_prompt": "Calculate the answer to the following math query about {}.", + "atk_acc": 19.38, + "atk_prompt": "Calculate the answer to the following math query about 5XeflW1ZJc {}. ", + "transfer_ori_acc": 16.25, + "transfer_atk_acc": 15.625 + }, + { + "dataset": "math", + "type": "role", + "origin_acc": 22.5, + "origin_prompt": "As a mathematics instructor, calculate the answer to the following problem related to {}.", + "atk_acc": 21.25, + "atk_prompt": "As a mathematics instructor, calculate the answer to the following problem related to AMsRIKZniY {}. ", + "transfer_ori_acc": 15.625, + "transfer_atk_acc": 15.0 + } +] \ No newline at end of file diff --git a/results_transfer/chatgpt_google-flan-ul2/deepwordbug_0_shot.json b/results_transfer/chatgpt_google-flan-ul2/deepwordbug_0_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..bd59c8202df31f9ff47de6061a0258f7ffbef2d5 --- /dev/null +++ b/results_transfer/chatgpt_google-flan-ul2/deepwordbug_0_shot.json @@ -0,0 +1,242 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 79.0, + "origin_prompt": "Review the sentence below and identify whether its grammar is 'Acceptable' or 'Unacceptable':", + "atk_acc": 74.5, + "atk_prompt": "Review the sentece below and identify whether its grammar is 'Acceptable' or 'Unacceptable':", + "transfer_ori_acc": 86.0, + "transfer_atk_acc": 86.3 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 79.5, + "origin_prompt": "While performing grammar analysis, classify the grammar of the following sentence as 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar:", + "atk_acc": 0.0, + "atk_prompt": "nhile performing grammr analysis, classify the rgammar of the fllowing sentenZe as 'acceptable' for corFect Xrammar or 'unacceptable' for incorrect gramUar:", + "transfer_ori_acc": 86.0, + "transfer_atk_acc": 84.39999999999999 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 74.0, + "origin_prompt": "Evaluate whether the two provided sentences convey the same meaning by answering 'equivalent' or 'not_equivalent'.", + "atk_acc": 70.0, + "atk_prompt": "Evaluate whether the two provRided sentnces convey the same meaning by answering 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 86.27450980392157, + "transfer_atk_acc": 84.06862745098039 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 76.0, + "origin_prompt": "In your capacity as a semantic assessment tool, evaluate the provided sentences and classify their meanings as 'equivalent' or 'not_equivalent'.", + "atk_acc": 53.0, + "atk_prompt": "nI your capacity as a sematnic asseJssment tol, evaluate the prvoided senRtences and classify their meainngs as 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 83.33333333333334, + "transfer_atk_acc": 82.84313725490196 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 78.0, + "origin_prompt": "Evaluate whether the given context supports the answer to the question by responding with 'entailment' or 'not_entailment'.", + "atk_acc": 50.0, + "atk_prompt": "Exaluate whetxher the iven contet Supports the anwser to the queston by respEonding with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 93.89999999999999, + "transfer_atk_acc": 94.39999999999999 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 79.5, + "origin_prompt": "As a textual inference expert, analyze if the answer to the question can be deduced from the provided context and select 'entailment' or 'not_entailment'.", + "atk_acc": 19.5, + "atk_prompt": "As a textula inference expert, aInalyze if the answer to the question can be deducud from the provided context and Melect 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 93.30000000000001, + "transfer_atk_acc": 93.2 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 82.0, + "origin_prompt": "Assess whether the following statements are identical in meaning by answering 'equivalent' or 'not_equivalent'.", + "atk_acc": 45.0, + "atk_prompt": "Assess whethdr the followng sLtatements are identocal in meuaning by answerinJ 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 87.7, + "transfer_atk_acc": 87.6 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 82.5, + "origin_prompt": "As an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions.", + "atk_acc": 79.5, + "atk_prompt": "Ao an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions. ", + "transfer_ori_acc": 88.3, + "transfer_atk_acc": 87.9 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 83.5, + "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.", + "atk_acc": 79.0, + "atk_prompt": "ArYe the folowing two sentenes entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word. ", + "transfer_ori_acc": 94.22382671480143, + "transfer_atk_acc": 94.22382671480143 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 80.5, + "origin_prompt": "As an instrument for entailment evaluation, consider the two sentences and determine if their relationship is 'entailment' or 'not_entailment'. Respond with 'entailment' or 'not_entailment':", + "atk_acc": 78.5, + "atk_prompt": "IAs an instrument for entailment evaluatioRn, consider the otwo sentences and determine if their relationship is 'entailment' or 'not_entailment'. Respond with 'entailment' or 'not_entailment':", + "transfer_ori_acc": 93.50180505415162, + "transfer_atk_acc": 93.86281588447653 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 93.0, + "origin_prompt": "Determine the overall sentiment of this sentence, categorizing it as 'positive' or 'negative':", + "atk_acc": 88.5, + "atk_prompt": "Deetrmine the overall sentLment of this sentence, categorizing it as 'positive' or 'negative': ", + "transfer_ori_acc": 96.67431192660551, + "transfer_atk_acc": 96.55963302752293 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 93.0, + "origin_prompt": "Functioning as a sentiment identification tool, assess if the following expression is 'positive' or 'negative'. Classify:", + "atk_acc": 85.0, + "atk_prompt": "uFnctioning as a sentmient identification tool, assess if the folGowing expression is 'positive' or 'negative'. Classify: ", + "transfer_ori_acc": 95.9862385321101, + "transfer_atk_acc": 95.52752293577981 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 66.2, + "origin_prompt": "Examine the pair of sentences and determine if they exhibit entailment or not_entailment. Answer with either 'entailment' or 'not_entailment'.", + "atk_acc": 53.52, + "atk_prompt": "Examine the Bair of snetences and determine if they exhibit entailment or not_entailment. Ancwer with eMther 'entailment' or 'not_entailment'.", + "transfer_ori_acc": 78.87323943661971, + "transfer_atk_acc": 77.46478873239437 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 63.38, + "origin_prompt": "In the role of a semantic relationship analyzer, examine the connection between the given sentences and decide if they exhibit entailment or not_entailment. Answer with 'entailment' or 'not_entailment':", + "atk_acc": 53.52, + "atk_prompt": "IQn the role of a semantic relationship analyzer, eamine the connection between the given sentences and decide if they exhbit entailment or not_entailment. Answer with 'entailment' or 'not_entailment':", + "transfer_ori_acc": 77.46478873239437, + "transfer_atk_acc": 77.46478873239437 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 62.5, + "origin_prompt": "Regarding the following multiple-choice question on {}, pick the correct answer from the options 'A', 'B', 'C', or 'D'.", + "atk_acc": 60.0, + "atk_prompt": "Regarding the following multiple-choice question on {}, pick the correct answeWr from the options 'A', 'B', 'C', or 'D'. ", + "transfer_ori_acc": 52.304964539007095, + "transfer_atk_acc": 52.4822695035461 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 62.5, + "origin_prompt": "With your knowledge of {}, tackle the following multiple-choice question by choosing 'A', 'B', 'C', or 'D'.", + "atk_acc": 59.0, + "atk_prompt": "With your knowledge of {}, tackle the follQwing multiple-choice question by choosing 'A', 'B', 'C', or 'D'.", + "transfer_ori_acc": 52.12765957446809, + "transfer_atk_acc": 53.191489361702125 + }, + { + "dataset": "squad_v2", + "type": "task", + "origin_acc": 71.87, + "origin_prompt": "Discover the best answer based on the context. If the context doesn't include an answer, respond with 'unanswerable'.", + "atk_acc": 56.61, + "atk_prompt": "DiscoveMr the best answr bassd on the Rontext. Iof the cnotext doesn't include an answer, resCpond with 'unanswerable'.", + "transfer_ori_acc": 89.38229949874687, + "transfer_atk_acc": 90.38229949874687 + }, + { + "dataset": "squad_v2", + "type": "role", + "origin_acc": 64.5, + "origin_prompt": "As an expert with a deep understanding of the context, find the best answer. If the context doesn't include an answer, say 'unanswerable'.", + "atk_acc": 36.52, + "atk_prompt": "zs an exprt with a deep understanding of the context, find the best answr. If the context doesn't include an anCswer, asy 'unanswerable'.", + "transfer_ori_acc": 89.44479949874687, + "transfer_atk_acc": 89.64896616541355 + }, + { + "dataset": "iwslt", + "type": "task", + "origin_acc": 0.22, + "origin_prompt": "Translate the provided sentence from {} to {} while maintaining the original meaning and context:", + "atk_acc": 0.21, + "atk_prompt": "Translate the provideMd sentence from {} to {} while mainOaining the original meaning and context:", + "transfer_ori_acc": 0.1697518407093131, + "transfer_atk_acc": 0.1715931236169142 + }, + { + "dataset": "iwslt", + "type": "role", + "origin_acc": 0.22, + "origin_prompt": "In the capacity of a proficient translator, please change the following sentence from {} to {}:", + "atk_acc": 0.21, + "atk_prompt": "In the capacity of a proficient tanslator, plnase change the following senlence from {} to {}: ", + "transfer_ori_acc": 0.17223574608131062, + "transfer_atk_acc": 0.17294967708177186 + }, + { + "dataset": "un_multi", + "type": "task", + "origin_acc": 0.34, + "origin_prompt": "Translate the following text from the source language {} to the target language {}:", + "atk_acc": 0.33, + "atk_prompt": "Translate the following text from the source language {} to the Ptarget langugae {}:", + "transfer_ori_acc": 0.30893486445367974, + "transfer_atk_acc": 0.3118511970354398 + }, + { + "dataset": "un_multi", + "type": "role", + "origin_acc": 0.34, + "origin_prompt": "In the role of a language expert, perform a machine translation for the provided sentence, changing it from {} to {}:", + "atk_acc": 0.32, + "atk_prompt": "In the role of a language expert, pErform a macRhine yranslation for the provided sentence, cZanging it from {} to {}: ", + "transfer_ori_acc": 0.31256243540663714, + "transfer_atk_acc": 0.31148261863602605 + }, + { + "dataset": "math", + "type": "task", + "origin_acc": 6.88, + "origin_prompt": "Resolve the following mathematical question associated with {}:", + "atk_acc": 5.62, + "atk_prompt": "Resolve the following mathematical quXestion assocated with {}:", + "transfer_ori_acc": 14.374999999999998, + "transfer_atk_acc": 14.374999999999998 + }, + { + "dataset": "math", + "type": "role", + "origin_acc": 5.0, + "origin_prompt": "Embodying a mathematics advisor, find the answer to the following math problem with {}:", + "atk_acc": 2.5, + "atk_prompt": "Embodying a mathematics advisor, ind the anwer to the fpollowing math problem with {}: ", + "transfer_ori_acc": 13.125, + "transfer_atk_acc": 11.875 + } +] \ No newline at end of file diff --git a/results_transfer/chatgpt_google-flan-ul2/deepwordbug_3_shot.json b/results_transfer/chatgpt_google-flan-ul2/deepwordbug_3_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..23aed068bb56b12bb6da092a48a603921789c0a6 --- /dev/null +++ b/results_transfer/chatgpt_google-flan-ul2/deepwordbug_3_shot.json @@ -0,0 +1,242 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 80.5, + "origin_prompt": "Examine the given sentence and decide if it is grammatically sound. Answer with either 'Acceptable' or 'Unacceptable'.", + "atk_acc": 78.5, + "atk_prompt": "Examine the given sentence and decide if it is gramatically sound. nAswer with either 'Acceptable' or 'Unacceptable'. ", + "transfer_ori_acc": 86.2, + "transfer_atk_acc": 86.3 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 81.5, + "origin_prompt": "Functioning as a grammar evaluation tool, analyze the given sentence and decide if it is grammatically correct, responding with 'acceptable' or 'unacceptable'.", + "atk_acc": 78.5, + "atk_prompt": "Funtioning as a graMmar evaluation tool, analyzY the given sentence and decide if it is grammatically corLect, responding with 'acceptable' or 'unacceptable'. ", + "transfer_ori_acc": 86.4, + "transfer_atk_acc": 87.1 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 74.5, + "origin_prompt": "Examine the pair of sentences and decide if their meanings are identical by answering with 'equivalent' or 'not_equivalent'.", + "atk_acc": 65.5, + "atk_prompt": "Examine the Kair of sentences and edcide if their meanijngs are ibentical by answering with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 85.5392156862745, + "transfer_atk_acc": 85.29411764705883 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 72.0, + "origin_prompt": "As a linguistic comparator, review the following pair of sentences and determine their semantic equivalence by choosing 'equivalent' or 'not_equivalent'.", + "atk_acc": 66.5, + "atk_prompt": "AV a linguistic comaprator, review the following pair of sentences and determine their semayntic equivalence by chhoosing 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 85.5392156862745, + "transfer_atk_acc": 85.7843137254902 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 85.0, + "origin_prompt": "Examine the context and question, and determine if the context logically implies the answer by selecting 'entailment' or 'not_entailment'.", + "atk_acc": 77.5, + "atk_prompt": "Examine the contedt and question, and determine if the cXontext logically implies the answer by selecting 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 93.5, + "transfer_atk_acc": 93.89999999999999 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 86.0, + "origin_prompt": "As a textual analyst, examine if the given context logically implies the answer to the question and indicate your decision with 'entailment' or 'not_entailment'.", + "atk_acc": 75.0, + "atk_prompt": "A a textual analyst, xamine if the gPven conSext logiclaly implies the asnwer to the qbuestion and indicate your decision with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 93.2, + "transfer_atk_acc": 93.60000000000001 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 82.0, + "origin_prompt": "Can these two statements be considered equal in meaning? Answer with 'equivalent' or 'not_equivalent'.", + "atk_acc": 80.0, + "atk_prompt": "Can these two statements be considJred equal in meaning? Answer with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 87.6, + "transfer_atk_acc": 87.8 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 83.0, + "origin_prompt": "Acting as a question equivalence instrument, determine if the provided questions are equivalent in meaning, answering with 'equivalent' for similar questions or 'not_equivalent' for dissimilar ones.", + "atk_acc": 81.5, + "atk_prompt": "Actinm as a question equivalence instrumetn, determine if the provided questions are equivalent in meaning, answering with 'equivalent' for similar questions or 'not_equivalent' for dissimilar ones. ", + "transfer_ori_acc": 88.6, + "transfer_atk_acc": 88.4 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 85.0, + "origin_prompt": "Does the relationship between the given sentences represent entailment or not_entailment? Respond with 'entailment' or 'not_entailment'.", + "atk_acc": 80.5, + "atk_prompt": "Does the relationshBp between the given sentences repersent entailment or not_entailment? Respond with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 93.86281588447653, + "transfer_atk_acc": 93.50180505415162 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 84.5, + "origin_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment'.", + "atk_acc": 81.0, + "atk_prompt": "In your role as an entailment analysis toIol, aZssess the relationship between the given sentences and cassify it as 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 92.4187725631769, + "transfer_atk_acc": 93.14079422382672 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 97.0, + "origin_prompt": "Evaluate the sentiment of the given text and classify it as 'positive' or 'negative'.", + "atk_acc": 94.0, + "atk_prompt": "ENvaluate the senttment of the giOven text and classify it as 'positive' or 'negative'. ", + "transfer_ori_acc": 96.78899082568807, + "transfer_atk_acc": 96.67431192660551 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 97.0, + "origin_prompt": "As a sentiment classifier, determine whether the following text is 'positive' or 'negative'.", + "atk_acc": 94.0, + "atk_prompt": "qAs a setniment classifier, determnne whether the following text is 'positive' or 'negative'. ", + "transfer_ori_acc": 96.67431192660551, + "transfer_atk_acc": 96.67431192660551 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 67.61, + "origin_prompt": "Indicate if the connection between the following sentences is 'entailment' or 'not_entailment'.", + "atk_acc": 64.79, + "atk_prompt": "Indicate if the connection between the following sentences is 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 76.05633802816901, + "transfer_atk_acc": 74.64788732394366 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 64.79, + "origin_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment'.", + "atk_acc": 59.15, + "atk_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 77.46478873239437, + "transfer_atk_acc": 74.64788732394366 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 66.5, + "origin_prompt": "In relation to the multiple-choice question on {}, please provide the accurate answer by choosing 'A', 'B', 'C', or 'D'.", + "atk_acc": 63.5, + "atk_prompt": "ISn ralation to the mulitple-choice question on {}, pleare provide the accurate answer by choosing 'A', 'B', 'C', or 'D'. ", + "transfer_ori_acc": 53.54609929078015, + "transfer_atk_acc": 52.836879432624116 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 65.5, + "origin_prompt": "As someone well-versed in {}, please address the multiple-choice question below by selecting 'A', 'B', 'C', or 'D'.", + "atk_acc": 62.5, + "atk_prompt": "As someone well-versed in {}, please address the mulZtiple-choice question below by selecting 'A', 'B', 'C', or 'D'.", + "transfer_ori_acc": 53.01418439716312, + "transfer_atk_acc": 53.36879432624113 + }, + { + "dataset": "squad_v2", + "type": "task", + "origin_acc": 73.25, + "origin_prompt": "Based on the given context, provide the best possible answer. If there's no answer available in the context, reply with 'unanswerable'.", + "atk_acc": 67.18, + "atk_prompt": "Based on the goiven context, provide the best possible answer. IL there's no answer available in the coYtext, reply with 'unanswerable'.", + "transfer_ori_acc": 90.04896616541353, + "transfer_atk_acc": 90.54896616541353 + }, + { + "dataset": "squad_v2", + "type": "role", + "origin_acc": 72.68, + "origin_prompt": "As a subject matter expert, extract the correct answer from the context. If an answer is not present, indicate 'unanswerable'.", + "atk_acc": 66.77, + "atk_prompt": "s a subject matter eAxpert, extract the correc answer from the context. If an answer is not presnt, idicate 'unanswerable'.", + "transfer_ori_acc": 89.54896616541353, + "transfer_atk_acc": 90.04896616541353 + }, + { + "dataset": "iwslt", + "type": "task", + "origin_acc": 0.23, + "origin_prompt": "Given the sentence below, perform a machine translation from {} to {}.", + "atk_acc": 0.23, + "atk_prompt": "Given the sentence below, perform a mRachine traCslation from {} to {}. ", + "transfer_ori_acc": 0.23710409171247415, + "transfer_atk_acc": 0.23480177292913945 + }, + { + "dataset": "iwslt", + "type": "role", + "origin_acc": 0.24, + "origin_prompt": "Assuming the responsibilities of a professional translator, translate the subsequent text passage from {} to {}.", + "atk_acc": 0.23, + "atk_prompt": "Assuming the responsibilities of a professional transaltor, translate the subsequent text passage from {} to {}. ", + "transfer_ori_acc": 0.2676477143562027, + "transfer_atk_acc": 0.23356148320550005 + }, + { + "dataset": "un_multi", + "type": "task", + "origin_acc": 0.35, + "origin_prompt": "Please perform a translation of the given sentence, converting it from {} to {}.", + "atk_acc": 0.26, + "atk_prompt": "Plese erform a translation of the Fiven sentence, converwing it from {} to {}. ", + "transfer_ori_acc": 0.35614481480027904, + "transfer_atk_acc": 0.35436612463392214 + }, + { + "dataset": "un_multi", + "type": "role", + "origin_acc": 0.35, + "origin_prompt": "As a qualified translator, convert the given sentence from its original {} language to the target language {}.", + "atk_acc": 0.35, + "atk_prompt": "As a qualified translator, convert the given sentence from its original {} language to the targte language {}. ", + "transfer_ori_acc": 0.35395992027011125, + "transfer_atk_acc": 0.3516955546172276 + }, + { + "dataset": "math", + "type": "task", + "origin_acc": 23.12, + "origin_prompt": "Calculate the answer to the following math query about {}.", + "atk_acc": 18.75, + "atk_prompt": "Calculate the ansewr to the followiJg math query about {}. ", + "transfer_ori_acc": 16.25, + "transfer_atk_acc": 16.875 + }, + { + "dataset": "math", + "type": "role", + "origin_acc": 23.12, + "origin_prompt": "As a mathematics instructor, calculate the answer to the following problem related to {}.", + "atk_acc": 20.62, + "atk_prompt": "As a mathematics insrtuctor, calculate the answer to the following problem Vrelated to {}. ", + "transfer_ori_acc": 15.625, + "transfer_atk_acc": 15.0 + } +] \ No newline at end of file diff --git a/results_transfer/chatgpt_google-flan-ul2/stresstest_0_shot.json b/results_transfer/chatgpt_google-flan-ul2/stresstest_0_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..d99863e3b0c5ab0f4f5ea34bcf23b7eb72797458 --- /dev/null +++ b/results_transfer/chatgpt_google-flan-ul2/stresstest_0_shot.json @@ -0,0 +1,242 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 78.5, + "origin_prompt": "Review the sentence below and identify whether its grammar is 'Acceptable' or 'Unacceptable':", + "atk_acc": 32.5, + "atk_prompt": "Review the sentence below and identify whether its grammar is 'Acceptable' or 'Unacceptable' and true is true :", + "transfer_ori_acc": 86.0, + "transfer_atk_acc": 84.7 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 79.5, + "origin_prompt": "While performing grammar analysis, classify the grammar of the following sentence as 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar:", + "atk_acc": 75.0, + "atk_prompt": "While performing grammar analysis, classify the grammar of the following sentence as 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar and true is true and true is true and true is true and true is true and true is true :", + "transfer_ori_acc": 86.0, + "transfer_atk_acc": 35.0 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 77.5, + "origin_prompt": "Can the given sentences be considered semantically identical? Please reply with 'equivalent' or 'not_equivalent'.", + "atk_acc": 73.0, + "atk_prompt": "Can the given sentences be considered semantically identical? Please reply with 'equivalent' or 'not_equivalent' and false is not true . ", + "transfer_ori_acc": 87.00980392156863, + "transfer_atk_acc": 85.5392156862745 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 76.0, + "origin_prompt": "In your capacity as a semantic assessment tool, evaluate the provided sentences and classify their meanings as 'equivalent' or 'not_equivalent'.", + "atk_acc": 15.5, + "atk_prompt": "In your capacity as a semantic assessment tool, evaluate the provided sentences and classify their meanings as 'equivalent' or 'not_equivalent' and true is true . ", + "transfer_ori_acc": 83.33333333333334, + "transfer_atk_acc": 82.84313725490196 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 78.5, + "origin_prompt": "Evaluate whether the given context supports the answer to the question by responding with 'entailment' or 'not_entailment'.", + "atk_acc": 74.5, + "atk_prompt": "Evaluate whether the given context supports the answer to the question by responding with 'entailment' or 'not_entailment' and true is true . ", + "transfer_ori_acc": 93.89999999999999, + "transfer_atk_acc": 93.4 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 79.5, + "origin_prompt": "As a textual inference expert, analyze if the answer to the question can be deduced from the provided context and select 'entailment' or 'not_entailment'.", + "atk_acc": 69.0, + "atk_prompt": "As a textual inference expert, analyze if the answer to the question can be deduced from the provided context and select 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 93.30000000000001, + "transfer_atk_acc": 92.7 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 80.0, + "origin_prompt": "Can these two statements be considered equal in meaning? Answer with 'equivalent' or 'not_equivalent'.", + "atk_acc": 72.5, + "atk_prompt": "Can these two statements be considered equal in meaning? Answer with 'equivalent' or 'not_equivalent' and false is not true . ", + "transfer_ori_acc": 87.4, + "transfer_atk_acc": 86.8 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 81.0, + "origin_prompt": "Functioning as a question similarity evaluation tool, analyze the given questions and decide if they share the same meaning, responding with 'equivalent' or 'not_equivalent'.", + "atk_acc": 80.5, + "atk_prompt": "Functioning as a question similarity evaluation tool, analyze the given questions and decide if they share the same meaning, responding with 'equivalent' or 'not_equivalent' and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 88.0, + "transfer_atk_acc": 88.3 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 83.5, + "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.", + "atk_acc": 81.5, + "atk_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word and true is true . ", + "transfer_ori_acc": 94.22382671480143, + "transfer_atk_acc": 92.4187725631769 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 81.5, + "origin_prompt": "As an instrument for entailment evaluation, consider the two sentences and determine if their relationship is 'entailment' or 'not_entailment'. Respond with 'entailment' or 'not_entailment':", + "atk_acc": 80.5, + "atk_prompt": "As an instrument for entailment evaluation, consider the two sentences and determine if their relationship is 'entailment' or 'not_entailment'. Respond with 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true :", + "transfer_ori_acc": 93.50180505415162, + "transfer_atk_acc": 92.4187725631769 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 91.0, + "origin_prompt": "Review this statement and decide whether it has a 'positive' or 'negative' sentiment:", + "atk_acc": 0.0, + "atk_prompt": "Review this statement and decide whether it has a 'positive' or 'negative' sentiment and true is true : ", + "transfer_ori_acc": 96.55963302752293, + "transfer_atk_acc": 96.21559633027523 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 93.5, + "origin_prompt": "Functioning as a sentiment identification tool, assess if the following expression is 'positive' or 'negative'. Classify:", + "atk_acc": 93.0, + "atk_prompt": "Functioning as a sentiment identification tool, assess if the following expression is 'positive' or 'negative'. Classify and false is not true : ", + "transfer_ori_acc": 95.9862385321101, + "transfer_atk_acc": 91.39908256880734 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 61.97, + "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.", + "atk_acc": 63.38, + "atk_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word and true is true . ", + "transfer_ori_acc": 78.87323943661971, + "transfer_atk_acc": 77.46478873239437 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 63.38, + "origin_prompt": "In the role of a semantic relationship analyzer, examine the connection between the given sentences and decide if they exhibit entailment or not_entailment. Answer with 'entailment' or 'not_entailment':", + "atk_acc": 59.15, + "atk_prompt": "In the role of a semantic relationship analyzer, examine the connection between the given sentences and decide if they exhibit entailment or not_entailment. Answer with 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true :", + "transfer_ori_acc": 77.46478873239437, + "transfer_atk_acc": 77.46478873239437 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 63.0, + "origin_prompt": "For each multiple-choice question about {}, identify the correct answer by selecting 'A', 'B', 'C', or 'D'.", + "atk_acc": 53.5, + "atk_prompt": "For each multiple-choice question about {}, identify the correct answer by selecting 'A', 'B', 'C', or 'D' and true is true . ", + "transfer_ori_acc": 51.77304964539007, + "transfer_atk_acc": 52.12765957446809 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 61.5, + "origin_prompt": "As a skilled person in the field of {}, please respond to the multiple-choice question by choosing 'A', 'B', 'C', or 'D'.", + "atk_acc": 57.0, + "atk_prompt": "As a skilled person in the field of {}, please respond to the multiple-choice question by choosing 'A', 'B', 'C', or 'D' and false is not true .", + "transfer_ori_acc": 52.4822695035461, + "transfer_atk_acc": 53.191489361702125 + }, + { + "dataset": "squad_v2", + "type": "task", + "origin_acc": 69.3, + "origin_prompt": "Based on the given context, provide the best possible answer. If there's no answer available in the context, respond with 'unanswerable'.", + "atk_acc": 69.88, + "atk_prompt": "Based on the given context, provide the best possible answer. If there's no answer available in the context, respond with 'unanswerable' and true is true .", + "transfer_ori_acc": 89.38229949874687, + "transfer_atk_acc": 88.81979949874687 + }, + { + "dataset": "squad_v2", + "type": "role", + "origin_acc": 64.4, + "origin_prompt": "As an expert with a deep understanding of the context, find the best answer. If the context doesn't include an answer, say 'unanswerable'.", + "atk_acc": 64.49, + "atk_prompt": "As an expert with a deep understanding of the context, find the best answer. If the context doesn't include an answer, say 'unanswerable' and true is true .", + "transfer_ori_acc": 89.44479949874687, + "transfer_atk_acc": 87.81979949874687 + }, + { + "dataset": "iwslt", + "type": "task", + "origin_acc": 0.22, + "origin_prompt": "Using machine translation, convert the given sentence from {} into the {} language:", + "atk_acc": 0.19, + "atk_prompt": "Using machine translation, convert the given sentence from {} into the {} language and false is not true :", + "transfer_ori_acc": 0.17087237648129655, + "transfer_atk_acc": 0.16269515463504752 + }, + { + "dataset": "iwslt", + "type": "role", + "origin_acc": 0.22, + "origin_prompt": "In the capacity of a proficient translator, please change the following sentence from {} to {}:", + "atk_acc": 0.21, + "atk_prompt": "In the capacity of a proficient translator, please change the following sentence from {} to and true is true and true is true and true is true and true is true and true is true {}: ", + "transfer_ori_acc": 0.17223574608131062, + "transfer_atk_acc": 0.14731943322573832 + }, + { + "dataset": "un_multi", + "type": "task", + "origin_acc": 0.34, + "origin_prompt": "Translate the subsequent text passage from its original {} language to the {} language:", + "atk_acc": 0.29, + "atk_prompt": "Translate the subsequent text passage from its original {} language to the {} language and false is not true :", + "transfer_ori_acc": 0.31391205501691966, + "transfer_atk_acc": 0.29524149996109 + }, + { + "dataset": "un_multi", + "type": "role", + "origin_acc": 0.34, + "origin_prompt": "In the role of a language expert, perform a machine translation for the provided sentence, changing it from {} to {}:", + "atk_acc": 0.31, + "atk_prompt": "In the role of a language expert, perform a machine translation for the provided sentence, changing it from {} to and false is not true {}: ", + "transfer_ori_acc": 0.31256243540663714, + "transfer_atk_acc": 0.30881978704578533 + }, + { + "dataset": "math", + "type": "task", + "origin_acc": 6.25, + "origin_prompt": "Resolve the following mathematical question associated with {}:", + "atk_acc": 5.62, + "atk_prompt": "Resolve the following mathematical question associated with and true is true {}:", + "transfer_ori_acc": 14.374999999999998, + "transfer_atk_acc": 14.374999999999998 + }, + { + "dataset": "math", + "type": "role", + "origin_acc": 5.0, + "origin_prompt": "Embodying a mathematics advisor, find the answer to the following math problem with {}:", + "atk_acc": 3.12, + "atk_prompt": "Embodying a mathematics advisor, find the answer to the following math problem with and true is true and true is true and true is true and true is true and true is true {}: ", + "transfer_ori_acc": 13.125, + "transfer_atk_acc": 13.750000000000002 + } +] \ No newline at end of file diff --git a/results_transfer/chatgpt_google-flan-ul2/stresstest_3_shot.json b/results_transfer/chatgpt_google-flan-ul2/stresstest_3_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..21fdf63a1b078fc9d5702d473e719c64dff26cde --- /dev/null +++ b/results_transfer/chatgpt_google-flan-ul2/stresstest_3_shot.json @@ -0,0 +1,242 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 80.5, + "origin_prompt": "Examine the given sentence and decide if it is grammatically sound. Answer with either 'Acceptable' or 'Unacceptable'.", + "atk_acc": 79.5, + "atk_prompt": "Examine the given sentence and decide if it is grammatically sound. Answer with either 'Acceptable' or 'Unacceptable' and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 86.2, + "transfer_atk_acc": 85.9 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 81.5, + "origin_prompt": "Functioning as a grammar evaluation tool, analyze the given sentence and decide if it is grammatically correct, responding with 'acceptable' or 'unacceptable'.", + "atk_acc": 80.5, + "atk_prompt": "Functioning as a grammar evaluation tool, analyze the given sentence and decide if it is grammatically correct, responding with 'acceptable' or 'unacceptable' and false is not true . ", + "transfer_ori_acc": 86.4, + "transfer_atk_acc": 86.4 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 73.5, + "origin_prompt": "Assess if the two given sentences have equivalent meanings by selecting 'equivalent' or 'not_equivalent'.", + "atk_acc": 71.5, + "atk_prompt": "Assess if the two given sentences have equivalent meanings by selecting 'equivalent' or 'not_equivalent' and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 84.06862745098039, + "transfer_atk_acc": 83.82352941176471 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 72.5, + "origin_prompt": "In the role of a textual comparison specialist, examine the given sentences and decide if they share the same meaning, responding with 'equivalent' or 'not_equivalent'.", + "atk_acc": 70.0, + "atk_prompt": "In the role of a textual comparison specialist, examine the given sentences and decide if they share the same meaning, responding with 'equivalent' or 'not_equivalent' and true is true . ", + "transfer_ori_acc": 87.00980392156863, + "transfer_atk_acc": 86.27450980392157 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 85.0, + "origin_prompt": "Examine the context and question, and determine if the context logically implies the answer by selecting 'entailment' or 'not_entailment'.", + "atk_acc": 79.5, + "atk_prompt": "Examine the context and question, and determine if the context logically implies the answer by selecting 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 93.5, + "transfer_atk_acc": 93.89999999999999 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 84.0, + "origin_prompt": "As a semantic researcher, evaluate whether the provided context supports the answer to the question and choose 'entailment' or 'not_entailment'.", + "atk_acc": 81.5, + "atk_prompt": "As a semantic researcher, evaluate whether the provided context supports the answer to the question and choose 'entailment' or 'not_entailment' and false is not true . ", + "transfer_ori_acc": 94.0, + "transfer_atk_acc": 94.3 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 81.5, + "origin_prompt": "Can these two statements be considered equal in meaning? Answer with 'equivalent' or 'not_equivalent'.", + "atk_acc": 81.0, + "atk_prompt": "Can these two statements be considered equal in meaning? Answer with 'equivalent' or 'not_equivalent' and false is not true . ", + "transfer_ori_acc": 87.6, + "transfer_atk_acc": 87.6 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 83.0, + "origin_prompt": "As an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions.", + "atk_acc": 82.0, + "atk_prompt": "As an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 89.0, + "transfer_atk_acc": 89.3 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 85.0, + "origin_prompt": "Does the relationship between the given sentences represent entailment or not_entailment? Respond with 'entailment' or 'not_entailment'.", + "atk_acc": 82.0, + "atk_prompt": "Does the relationship between the given sentences represent entailment or not_entailment? Respond with 'entailment' or 'not_entailment' and false is not true . ", + "transfer_ori_acc": 93.86281588447653, + "transfer_atk_acc": 93.14079422382672 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 83.0, + "origin_prompt": "As an entailment identification system, examine the connection between the following sentences and respond with 'entailment' or 'not_entailment'.", + "atk_acc": 83.0, + "atk_prompt": "As an entailment identification system, examine the connection between the following sentences and respond with 'entailment' or 'not_entailment' and false is not true . ", + "transfer_ori_acc": 92.7797833935018, + "transfer_atk_acc": 92.7797833935018 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 97.0, + "origin_prompt": "Evaluate the sentiment of the given text and classify it as 'positive' or 'negative'.", + "atk_acc": 95.5, + "atk_prompt": "Evaluate the sentiment of the given text and classify it as 'positive' or 'negative' and true is true . ", + "transfer_ori_acc": 96.78899082568807, + "transfer_atk_acc": 96.78899082568807 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 97.0, + "origin_prompt": "As a sentiment classifier, determine whether the following text is 'positive' or 'negative'.", + "atk_acc": 96.0, + "atk_prompt": "As a sentiment classifier, determine whether the following text is 'positive' or 'negative' and false is not true . ", + "transfer_ori_acc": 96.67431192660551, + "transfer_atk_acc": 96.44495412844036 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 67.61, + "origin_prompt": "Indicate if the connection between the following sentences is 'entailment' or 'not_entailment'.", + "atk_acc": 60.56, + "atk_prompt": "Indicate if the connection between the later sentences is 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 76.05633802816901, + "transfer_atk_acc": 76.05633802816901 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 66.2, + "origin_prompt": "In the role of a semantic relationship analyzer, examine the connection between the given sentences and decide if they exhibit entailment or not_entailment. Answer with 'entailment' or 'not_entailment'.", + "atk_acc": 59.15, + "atk_prompt": "In the role of a semantic relationship analyzer, investigated the linking between the given sentences and decide if they exhibit entailment or not_entailment. Answer with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 76.05633802816901, + "transfer_atk_acc": 74.64788732394366 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 66.5, + "origin_prompt": "Please respond to the multiple-choice question about {} by selecting the appropriate answer: 'A', 'B', 'C', or 'D'.", + "atk_acc": 61.5, + "atk_prompt": "Please respond to the multiple-choice question about {} by selecting the appropriate answer: 'A', 'B', 'C', or 'D' and false is not true . ", + "transfer_ori_acc": 53.36879432624113, + "transfer_atk_acc": 51.95035460992907 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 65.5, + "origin_prompt": "As someone well-versed in {}, please address the multiple-choice question below by selecting 'A', 'B', 'C', or 'D'.", + "atk_acc": 59.5, + "atk_prompt": "As someone well-versed in {}, please address the multiple-choice question below by selecting 'A', 'B', 'C', or 'D' and false is not true .", + "transfer_ori_acc": 53.01418439716312, + "transfer_atk_acc": 51.95035460992907 + }, + { + "dataset": "squad_v2", + "type": "task", + "origin_acc": 73.99, + "origin_prompt": "Please derive the most fitting answer from the context. If there isn't an answer in the context, use 'unanswerable'.", + "atk_acc": 61.58, + "atk_prompt": "Please derive the most fitting answer from the context. If there isn't an answer in the context, use 'unanswerable' and false is not true .", + "transfer_ori_acc": 90.54896616541353, + "transfer_atk_acc": 91.04896616541353 + }, + { + "dataset": "squad_v2", + "type": "role", + "origin_acc": 72.94, + "origin_prompt": "Being well-versed in the context, please derive the most fitting answer. If there isn't an answer in the context, use 'unanswerable'.", + "atk_acc": 62.85, + "atk_prompt": "Being well-versed in the context, please derive the most fitting answer. If there isn't an answer in the context, use 'unanswerable' and false is not true .", + "transfer_ori_acc": 91.04896616541353, + "transfer_atk_acc": 90.54896616541353 + }, + { + "dataset": "iwslt", + "type": "task", + "origin_acc": 0.24, + "origin_prompt": "Please perform a translation of the given sentence, converting it from {} to {}.", + "atk_acc": 0.23, + "atk_prompt": "Please perform a translation of the given sentence, converting it from {} to and true is true and true is true and true is true and true is true and true is true {}. ", + "transfer_ori_acc": 0.23770260910322083, + "transfer_atk_acc": 0.19504983833847192 + }, + { + "dataset": "iwslt", + "type": "role", + "origin_acc": 0.24, + "origin_prompt": "In the role of a language expert, perform a machine translation for the provided sentence, changing it from {} to {}.", + "atk_acc": 0.23, + "atk_prompt": "In the role of a language expert, perform a machine translation for the provided sentence, changing it from {} to and false is not true {}. ", + "transfer_ori_acc": 0.236844571841243, + "transfer_atk_acc": 0.23378656190205807 + }, + { + "dataset": "un_multi", + "type": "task", + "origin_acc": 0.35, + "origin_prompt": "Perform a machine translation for the provided sentence, changing it from {} to {}.", + "atk_acc": 0.34, + "atk_prompt": "Perform a machine translation for the provided sentence, changing it from {} to and false is not true {}. ", + "transfer_ori_acc": 0.3589520956246615, + "transfer_atk_acc": 0.35262904846027593 + }, + { + "dataset": "un_multi", + "type": "role", + "origin_acc": 0.35, + "origin_prompt": "Acting as a professional translator, translate the provided sentence from {} to {}.", + "atk_acc": 0.35, + "atk_prompt": "Acting as a professional translator, translate the provided sentence from {} to and true is true {}. ", + "transfer_ori_acc": 0.35878153272684565, + "transfer_atk_acc": 0.3528990956235115 + }, + { + "dataset": "math", + "type": "task", + "origin_acc": 23.12, + "origin_prompt": "Resolve the following mathematical question associated with {}.", + "atk_acc": 20.0, + "atk_prompt": "Resolve the following mathematical question associated with and true is true {}. ", + "transfer_ori_acc": 16.25, + "transfer_atk_acc": 16.25 + }, + { + "dataset": "math", + "type": "role", + "origin_acc": 22.5, + "origin_prompt": "As a mathematics instructor, calculate the answer to the following problem related to {}.", + "atk_acc": 20.62, + "atk_prompt": "As a mathematics instructor, calculate the answer to the following problem related to and false is not true {}. ", + "transfer_ori_acc": 15.625, + "transfer_atk_acc": 11.875 + } +] \ No newline at end of file diff --git a/results_transfer/chatgpt_google-flan-ul2/textbugger_0_shot.json b/results_transfer/chatgpt_google-flan-ul2/textbugger_0_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..5dc54062c7f9ef4d45096132fe1035931850117e --- /dev/null +++ b/results_transfer/chatgpt_google-flan-ul2/textbugger_0_shot.json @@ -0,0 +1,242 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 77.5, + "origin_prompt": "Assess the grammatical structure of the given sentence and classify it as 'Acceptable' or 'Unacceptable':", + "atk_acc": 6.5, + "atk_prompt": "As sess the garmmatical structure of the conferred sentence and classify it as 'Acceptable' or 'Unacceptable':", + "transfer_ori_acc": 85.39999999999999, + "transfer_atk_acc": 85.39999999999999 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 79.5, + "origin_prompt": "While performing grammar analysis, classify the grammar of the following sentence as 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar:", + "atk_acc": 9.5, + "atk_prompt": "While prforming grammar analyzing, classify the grammar of the follows sentеnce as 'acceptable' for correcting gammar or 'unacceptable' for incorrect ɡrammar:", + "transfer_ori_acc": 86.0, + "transfer_atk_acc": 85.9 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 75.0, + "origin_prompt": "Evaluate whether the two provided sentences convey the same meaning by answering 'equivalent' or 'not_equivalent'.", + "atk_acc": 67.5, + "atk_prompt": "Evaluate wհether the both provided sentences convey the same meaninɡ by answeriոg 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 86.27450980392157, + "transfer_atk_acc": 87.00980392156863 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 75.0, + "origin_prompt": "In your capacity as a semantic assessment tool, evaluate the provided sentences and classify their meanings as 'equivalent' or 'not_equivalent'.", + "atk_acc": 9.0, + "atk_prompt": "In your capacities as a semɑntic assessment tool, evaluate the provdied penalties and classify their meaning as 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 83.33333333333334, + "transfer_atk_acc": 84.06862745098039 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 77.0, + "origin_prompt": "Consider the context and question, and indicate if the answer can be logically deduced from the context by responding with 'entailment' or 'not_entailment'.", + "atk_acc": 57.0, + "atk_prompt": "Considеr the context and question, and idicate if the respond can be logically ԁeduced from the context by ⲅesponding with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 93.7, + "transfer_atk_acc": 93.89999999999999 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 78.5, + "origin_prompt": "As a textual inference expert, analyze if the answer to the question can be deduced from the provided context and select 'entailment' or 'not_entailment'.", + "atk_acc": 20.0, + "atk_prompt": "Because a textual inference expert, analysing if the answeⲅ to the question can be deduced from the providеd contex𝚝 and choosing 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 93.30000000000001, + "transfer_atk_acc": 93.10000000000001 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 81.5, + "origin_prompt": "Assess whether the following statements are identical in meaning by answering 'equivalent' or 'not_equivalent'.", + "atk_acc": 44.5, + "atk_prompt": "Assess whether the following statements are analogous in maning by answeⲅing 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 87.7, + "transfer_atk_acc": 88.0 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 83.0, + "origin_prompt": "As an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions.", + "atk_acc": 77.5, + "atk_prompt": "As an instrument for problem comparison evɑluation, consider the ques𝚝ions and determine if their meaning is the same, respond with 'equivalent' for similar questiоns or 'not_equivalent' for different questions. ", + "transfer_ori_acc": 88.3, + "transfer_atk_acc": 88.5 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 83.0, + "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.", + "atk_acc": 78.5, + "atk_prompt": "Be the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word. ", + "transfer_ori_acc": 94.22382671480143, + "transfer_atk_acc": 94.22382671480143 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 81.0, + "origin_prompt": "Working as an entailment classifier, identify whether the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment':", + "atk_acc": 74.5, + "atk_prompt": "Working as an entailment classifier, detecting if the given torque of sentences displays entailment or not_entailment. Answering with 'entailment' or 'not_entailment':", + "transfer_ori_acc": 94.58483754512635, + "transfer_atk_acc": 93.86281588447653 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 90.0, + "origin_prompt": "Review this statement and decide whether it has a 'positive' or 'negative' sentiment:", + "atk_acc": 82.5, + "atk_prompt": "Review this speeches and decide if it has a 'positive' or 'negative' sentiment: ", + "transfer_ori_acc": 96.55963302752293, + "transfer_atk_acc": 96.10091743119266 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 93.5, + "origin_prompt": "Acting as a sentiment evaluator, identify if the given sentence is 'positive' or 'negative'. Classify:", + "atk_acc": 86.0, + "atk_prompt": "Atcing as a sentiemnt evalua𝚝or, detecting if the given sentence is 'positive' or 'negative'. Classify: ", + "transfer_ori_acc": 96.21559633027523, + "transfer_atk_acc": 95.9862385321101 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 67.61, + "origin_prompt": "Examine the pair of sentences and determine if they exhibit entailment or not_entailment. Answer with either 'entailment' or 'not_entailment'.", + "atk_acc": 52.11, + "atk_prompt": "Examine the paiⲅ of sentences and determines if they exhibit entailment or not_entailment. Answer with neither 'entailment' or 'not_entailment'.", + "transfer_ori_acc": 78.87323943661971, + "transfer_atk_acc": 80.28169014084507 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 60.56, + "origin_prompt": "Working as an entailment classifier, identify whether the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment':", + "atk_acc": 52.11, + "atk_prompt": "Working as an entailment classifier, detects whether the given pair of sentences displays entailment or not_entailment. Answering with 'entailment' or 'not_entailment':", + "transfer_ori_acc": 80.28169014084507, + "transfer_atk_acc": 78.87323943661971 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 62.5, + "origin_prompt": "Regarding the following multiple-choice question on {}, pick the correct answer from the options 'A', 'B', 'C', or 'D'.", + "atk_acc": 51.0, + "atk_prompt": "Regarding the hereafter multiple-choice question on {}, piϲk the corrects answers from the options 'A', 'B', 'C', or 'D'. ", + "transfer_ori_acc": 52.304964539007095, + "transfer_atk_acc": 52.836879432624116 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 62.5, + "origin_prompt": "With your knowledge of {}, tackle the following multiple-choice question by choosing 'A', 'B', 'C', or 'D'.", + "atk_acc": 57.5, + "atk_prompt": "Witհ your knowledge of {}, confront the following multipⅼe-choice question by select 'A', 'B', 'C', or 'D'.", + "transfer_ori_acc": 52.12765957446809, + "transfer_atk_acc": 54.25531914893617 + }, + { + "dataset": "squad_v2", + "type": "task", + "origin_acc": 69.27, + "origin_prompt": "Based on the given context, provide the best possible answer. If there's no answer available in the context, respond with 'unanswerable'.", + "atk_acc": 49.17, + "atk_prompt": "Based on the given ambient, provide the nicest achievable answer. If there's no answer avaiⅼable in the context, respond with 'unanswerable'.", + "transfer_ori_acc": 89.38229949874687, + "transfer_atk_acc": 89.38229949874687 + }, + { + "dataset": "squad_v2", + "type": "role", + "origin_acc": 64.85, + "origin_prompt": "As an expert with a deep understanding of the context, find the best answer. If the context doesn't include an answer, say 'unanswerable'.", + "atk_acc": 14.05, + "atk_prompt": "As an expert with a deepest understanding of the context, found the nicest respond. Though the context doesn't incⅼude an answers, telling 'unanswerable'.", + "transfer_ori_acc": 89.44479949874687, + "transfer_atk_acc": 89.20836682222799 + }, + { + "dataset": "iwslt", + "type": "task", + "origin_acc": 0.22, + "origin_prompt": "Translate the provided sentence from {} to {} while maintaining the original meaning and context:", + "atk_acc": 0.19, + "atk_prompt": "Translate the provied punishments from {} to {} while maintaining the original meaning and context:", + "transfer_ori_acc": 0.1697518407093131, + "transfer_atk_acc": 0.17468116580134835 + }, + { + "dataset": "iwslt", + "type": "role", + "origin_acc": 0.22, + "origin_prompt": "Taking on the role of a certified translator, perform a translation of the given sentence from {} to {}:", + "atk_acc": 0.03, + "atk_prompt": "Pick on the role of a certified translation, fulfilling a translation of the yielded sentence from {} to {}: ", + "transfer_ori_acc": 0.1727120569025014, + "transfer_atk_acc": 0.1751877743047567 + }, + { + "dataset": "un_multi", + "type": "task", + "origin_acc": 0.34, + "origin_prompt": "Perform a machine translation for the provided sentence, changing it from {} to {}:", + "atk_acc": 0.32, + "atk_prompt": "Perform a typewriters translation for the provided sentence, altered it from {} to {}:", + "transfer_ori_acc": 0.3102455120610509, + "transfer_atk_acc": 0.31389089309916657 + }, + { + "dataset": "un_multi", + "type": "role", + "origin_acc": 0.34, + "origin_prompt": "While acting as an experienced language expert, translate the following text from {} to {}:", + "atk_acc": 0.32, + "atk_prompt": "Whilst acting as an experienced language expert, translate the following laws from {} to {}: ", + "transfer_ori_acc": 0.3133010160127483, + "transfer_atk_acc": 0.31174571065315043 + }, + { + "dataset": "math", + "type": "task", + "origin_acc": 6.25, + "origin_prompt": "Resolve the following mathematical question associated with {}:", + "atk_acc": 3.75, + "atk_prompt": "Solving the following mathematical question associɑted with {}:", + "transfer_ori_acc": 14.374999999999998, + "transfer_atk_acc": 14.374999999999998 + }, + { + "dataset": "math", + "type": "role", + "origin_acc": 5.0, + "origin_prompt": "In the position of a mathematics coach, figure out the answer to this math challenge involving {}:", + "atk_acc": 1.25, + "atk_prompt": "In the poste of a mathematical coach, figure out the replying to this math challenge involving {}: ", + "transfer_ori_acc": 10.625, + "transfer_atk_acc": 14.374999999999998 + } +] \ No newline at end of file diff --git a/results_transfer/chatgpt_google-flan-ul2/textbugger_3_shot.json b/results_transfer/chatgpt_google-flan-ul2/textbugger_3_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..dc0e596a36c519495a79607c0c99d191fa7bbc63 --- /dev/null +++ b/results_transfer/chatgpt_google-flan-ul2/textbugger_3_shot.json @@ -0,0 +1,242 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 80.5, + "origin_prompt": "Analyze the provided sentence and classify its grammatical correctness as 'Acceptable' or 'Unacceptable'.", + "atk_acc": 67.5, + "atk_prompt": "Analysing the provided sentences and classify its pronouns correct as 'Acceptable' or 'Unacceptable'. ", + "transfer_ori_acc": 86.0, + "transfer_atk_acc": 84.0 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 81.0, + "origin_prompt": "In the role of a syntax analyzer, examine the grammar of the provided sentence and decide if it is correct, answering with 'acceptable' for grammatically correct sentences or 'unacceptable' for incorrect ones.", + "atk_acc": 77.5, + "atk_prompt": "In the role of a syntax checker, examine the grammɑr of the provided sentence and decide if it is correct, answering with 'acceptable' for grammatically correct sentence or 'unacceptable' for incorrect ones. ", + "transfer_ori_acc": 86.5, + "transfer_atk_acc": 86.8 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 73.0, + "origin_prompt": "Assess if the two given sentences have equivalent meanings by selecting 'equivalent' or 'not_equivalent'.", + "atk_acc": 68.0, + "atk_prompt": "Ass ess if the two gien sentences have equivalent meanings by selecting 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 84.06862745098039, + "transfer_atk_acc": 84.06862745098039 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 71.5, + "origin_prompt": "As a linguistic comparator, review the following pair of sentences and determine their semantic equivalence by choosing 'equivalent' or 'not_equivalent'.", + "atk_acc": 64.5, + "atk_prompt": "As a linguistic comparator, review the following torque of sentences and determine their semantiϲ eԛuivalence by choosing 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 85.5392156862745, + "transfer_atk_acc": 86.27450980392157 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 85.0, + "origin_prompt": "Examine the context and question, and determine if the context logically implies the answer by selecting 'entailment' or 'not_entailment'.", + "atk_acc": 76.0, + "atk_prompt": "Examine the cntext and question, and determine if the backdrop logically implies the answer by selecting 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 93.5, + "transfer_atk_acc": 93.60000000000001 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 83.0, + "origin_prompt": "As a semantic researcher, evaluate whether the provided context supports the answer to the question and choose 'entailment' or 'not_entailment'.", + "atk_acc": 73.5, + "atk_prompt": "Aѕ a semantic researcher, evaluate si the gave framework supports the respond to the question and choose 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 94.0, + "transfer_atk_acc": 93.7 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 82.5, + "origin_prompt": "Can these two statements be considered equal in meaning? Answer with 'equivalent' or 'not_equivalent'.", + "atk_acc": 80.0, + "atk_prompt": "Can these two st atements be considered equal in meaning? Ansԝer with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 87.6, + "transfer_atk_acc": 88.5 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 82.5, + "origin_prompt": "In the capacity of a question assessment system, indicate if the meaning of the provided questions is the same, responding with 'equivalent' or 'not_equivalent'.", + "atk_acc": 80.5, + "atk_prompt": "In the abilities of a question ɑssessment system, indicate if the meaning of the provided matters is the same, responding with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 88.7, + "transfer_atk_acc": 88.6 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 84.0, + "origin_prompt": "Review the two statements and categorize their relationship as either 'entailment' or 'not_entailment'.", + "atk_acc": 76.5, + "atk_prompt": "Review the two declarations and categorize their relationship as neither 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 92.7797833935018, + "transfer_atk_acc": 92.7797833935018 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 83.5, + "origin_prompt": "As an entailment identification system, examine the connection between the following sentences and respond with 'entailment' or 'not_entailment'.", + "atk_acc": 80.0, + "atk_prompt": "As an entailment identification system, eamine the connection between the following sentences and reѕpond with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 92.7797833935018, + "transfer_atk_acc": 92.7797833935018 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 95.5, + "origin_prompt": "After examining the following expression, label its emotion as either 'positive' or 'negative'.", + "atk_acc": 74.0, + "atk_prompt": "Ater examining the following expression, etiquette its emotion as nor 'positive' or 'negative'. ", + "transfer_ori_acc": 92.77522935779817, + "transfer_atk_acc": 95.52752293577981 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 97.5, + "origin_prompt": "In the capacity of a sentiment classifier, decide whether the given quote is 'positive' or 'negative'.", + "atk_acc": 93.5, + "atk_prompt": "Onto the capacity of a feeling classifeir, decide si the given quote is 'positive' or 'negative'. ", + "transfer_ori_acc": 96.44495412844036, + "transfer_atk_acc": 96.21559633027523 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 69.01, + "origin_prompt": "Indicate if the connection between the following sentences is 'entailment' or 'not_entailment'.", + "atk_acc": 61.97, + "atk_prompt": "Indicate if the connection between the following sentences is 'entailment' or 'not_entailment' Vq3aBzuZcD. ", + "transfer_ori_acc": 76.05633802816901, + "transfer_atk_acc": 76.05633802816901 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 66.2, + "origin_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment'.", + "atk_acc": 59.15, + "atk_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment' is77sOXAu8. ", + "transfer_ori_acc": 77.46478873239437, + "transfer_atk_acc": 76.05633802816901 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 67.5, + "origin_prompt": "In relation to the multiple-choice question on {}, please provide the accurate answer by choosing 'A', 'B', 'C', or 'D'.", + "atk_acc": 61.5, + "atk_prompt": "At relationship to the mul𝚝iple-choice question on {}, please proviԁe the accurate responding by choosing 'A', 'B', 'C', or 'D'. ", + "transfer_ori_acc": 53.54609929078015, + "transfer_atk_acc": 53.54609929078015 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 65.5, + "origin_prompt": "As someone well-versed in {}, please address the multiple-choice question below by selecting 'A', 'B', 'C', or 'D'.", + "atk_acc": 61.5, + "atk_prompt": "As someone well-vesred in {}, please address the muⅼtiple-choice questioո below by selecting 'A', 'B', 'C', or 'D'.", + "transfer_ori_acc": 53.01418439716312, + "transfer_atk_acc": 53.01418439716312 + }, + { + "dataset": "squad_v2", + "type": "task", + "origin_acc": 72.75, + "origin_prompt": "Based on the given context, provide the best possible answer. If there's no answer available in the context, reply with 'unanswerable'.", + "atk_acc": 67.93, + "atk_prompt": "Based on the giveո context, provide the best pоssible answer. If there's no answer available in the contex𝚝, reply with 'unanswerable'.", + "transfer_ori_acc": 90.04896616541353, + "transfer_atk_acc": 90.04896616541353 + }, + { + "dataset": "squad_v2", + "type": "role", + "origin_acc": 70.63, + "origin_prompt": "As a proficient expert in the given context, search for the most relevant answer. If the answer cannot be found, respond by saying 'unanswerable'.", + "atk_acc": 68.31, + "atk_prompt": "As a profcient expert in the given cоntext, search for the most relevant answer. If the anwser cannot be unearthed, respond by saying 'unanswerable'.", + "transfer_ori_acc": 90.54896616541353, + "transfer_atk_acc": 90.54896616541353 + }, + { + "dataset": "iwslt", + "type": "task", + "origin_acc": 0.23, + "origin_prompt": "Please perform a translation of the given sentence, converting it from {} to {}.", + "atk_acc": 0.23, + "atk_prompt": "Pⅼease perform a translating of the givеn sentence, conversion it from {} to {}. ", + "transfer_ori_acc": 0.23770260910322083, + "transfer_atk_acc": 0.2378477137621768 + }, + { + "dataset": "iwslt", + "type": "role", + "origin_acc": 0.24, + "origin_prompt": "Assuming the responsibilities of a professional translator, translate the subsequent text passage from {} to {}.", + "atk_acc": 0.23, + "atk_prompt": "Assuminɡ the responsibilities of a professional transla𝚝or, translate the subsequent text passage from {} to {}. ", + "transfer_ori_acc": 0.2676477143562027, + "transfer_atk_acc": 0.26838348206750196 + }, + { + "dataset": "un_multi", + "type": "task", + "origin_acc": 0.35, + "origin_prompt": "Please perform a translation of the given sentence, converting it from {} to {}.", + "atk_acc": 0.35, + "atk_prompt": "Plеase perform a translation of the given sentence, convert it from {} to {}. ", + "transfer_ori_acc": 0.35614481480027904, + "transfer_atk_acc": 0.3540588157620361 + }, + { + "dataset": "un_multi", + "type": "role", + "origin_acc": 0.35, + "origin_prompt": "As a qualified translator, convert the given sentence from its original {} language to the target language {}.", + "atk_acc": 0.34, + "atk_prompt": "As a eligible translator, convert the given sentence from its orіginal {} language to the tarɡet language {}. ", + "transfer_ori_acc": 0.35395992027011125, + "transfer_atk_acc": 0.35407154067545005 + }, + { + "dataset": "math", + "type": "task", + "origin_acc": 23.12, + "origin_prompt": "Calculate the answer to the following math query about {}.", + "atk_acc": 20.62, + "atk_prompt": "Calculate the answer to the 𝚏ollowing math query about {}. ", + "transfer_ori_acc": 16.25, + "transfer_atk_acc": 16.25 + }, + { + "dataset": "math", + "type": "role", + "origin_acc": 22.5, + "origin_prompt": "As a mathematics instructor, calculate the answer to the following problem related to {}.", + "atk_acc": 19.38, + "atk_prompt": "Since a mathematics istructor, calculate the answer to the following problem related to {}. ", + "transfer_ori_acc": 15.625, + "transfer_atk_acc": 15.0 + } +] \ No newline at end of file diff --git a/results_transfer/chatgpt_google-flan-ul2/textfooler_0_shot.json b/results_transfer/chatgpt_google-flan-ul2/textfooler_0_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..6509c1a412eed3ef28e54bd0aa9cf34a1927a767 --- /dev/null +++ b/results_transfer/chatgpt_google-flan-ul2/textfooler_0_shot.json @@ -0,0 +1,242 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 79.0, + "origin_prompt": "Review the sentence below and identify whether its grammar is 'Acceptable' or 'Unacceptable':", + "atk_acc": 2.0, + "atk_prompt": "Revising the punishments below and locate whether its dictionary is 'Acceptable' or 'Unacceptable':", + "transfer_ori_acc": 86.0, + "transfer_atk_acc": 78.10000000000001 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 80.0, + "origin_prompt": "While performing grammar analysis, classify the grammar of the following sentence as 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar:", + "atk_acc": 6.0, + "atk_prompt": "While performing jargon discusses, categories the language of the below sentence as 'acceptable' for mend jargon or 'unacceptable' for unfit grammar:", + "transfer_ori_acc": 86.0, + "transfer_atk_acc": 85.1 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 75.0, + "origin_prompt": "Examine the pair of sentences and decide if their meanings are identical by answering with 'equivalent' or 'not_equivalent'.", + "atk_acc": 68.5, + "atk_prompt": "Examine the pair of sentences and selects if their regard are identical by responding with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 86.02941176470588, + "transfer_atk_acc": 85.7843137254902 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 75.5, + "origin_prompt": "In your capacity as a semantic assessment tool, evaluate the provided sentences and classify their meanings as 'equivalent' or 'not_equivalent'.", + "atk_acc": 0.0, + "atk_prompt": "About your expertise as a semantic appraisal means, evaluate the provided penalty and classify their meanings as 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 83.33333333333334, + "transfer_atk_acc": 81.61764705882352 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 77.5, + "origin_prompt": "Consider the context and question, and indicate if the answer can be logically deduced from the context by responding with 'entailment' or 'not_entailment'.", + "atk_acc": 0.5, + "atk_prompt": "Recital the context and items, and specify if the solution can be logically subtracted from the framework by reactions with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 93.7, + "transfer_atk_acc": 58.4 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 78.0, + "origin_prompt": "As a semantic researcher, evaluate whether the provided context supports the answer to the question and choose 'entailment' or 'not_entailment'.", + "atk_acc": 0.0, + "atk_prompt": "As a semantic fellows, measuring whether the rendered background supports the answers to the item and chosen 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 94.69999999999999, + "transfer_atk_acc": 94.0 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 80.5, + "origin_prompt": "Can these two statements be considered equal in meaning? Answer with 'equivalent' or 'not_equivalent'.", + "atk_acc": 0.0, + "atk_prompt": "Kan these two statement is studied same in sens? Reaction with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 87.4, + "transfer_atk_acc": 88.6 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 80.5, + "origin_prompt": "Functioning as a question similarity evaluation tool, analyze the given questions and decide if they share the same meaning, responding with 'equivalent' or 'not_equivalent'.", + "atk_acc": 76.5, + "atk_prompt": "Functioning as a question similarity evaluation tool, analyse the given questions and decide if they share the same feel, responding with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 88.0, + "transfer_atk_acc": 88.9 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 83.5, + "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.", + "atk_acc": 60.5, + "atk_prompt": "Pose the aftermath two phrases entailment or not_entailment? Reactions me with \"entailment\" or \"not_entailment\", just one adjectives. ", + "transfer_ori_acc": 94.22382671480143, + "transfer_atk_acc": 92.4187725631769 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 81.0, + "origin_prompt": "As an instrument for entailment evaluation, consider the two sentences and determine if their relationship is 'entailment' or 'not_entailment'. Respond with 'entailment' or 'not_entailment':", + "atk_acc": 60.0, + "atk_prompt": "Because an devices for entailment appraised, revisit the two phrases and identifying if their correlate is 'entailment' or 'not_entailment'. Answered with 'entailment' or 'not_entailment':", + "transfer_ori_acc": 93.50180505415162, + "transfer_atk_acc": 91.69675090252709 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 90.5, + "origin_prompt": "Review this statement and decide whether it has a 'positive' or 'negative' sentiment:", + "atk_acc": 0.0, + "atk_prompt": "Analysis this assertion and defining whether it ai a 'positive' or 'negative' sentiment: ", + "transfer_ori_acc": 96.55963302752293, + "transfer_atk_acc": 96.10091743119266 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 94.0, + "origin_prompt": "Emulating a sentiment classification system, indicate whether the provided text is 'positive' or 'negative'. Classify:", + "atk_acc": 4.5, + "atk_prompt": "Emulating a foreboding classifies regime, explain whether the stipulated laws is 'positive' or 'negative'. Grading: ", + "transfer_ori_acc": 95.87155963302753, + "transfer_atk_acc": 95.52752293577981 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 69.01, + "origin_prompt": "Identify whether the given pair of sentences demonstrates entailment or not_entailment. Answer with 'entailment' or 'not_entailment'.", + "atk_acc": 52.11, + "atk_prompt": "Identify whether the given pair of sentence exposes entailment or not_entailment. Retaliated with 'entailment' or 'not_entailment'.", + "transfer_ori_acc": 78.87323943661971, + "transfer_atk_acc": 83.09859154929578 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 67.61, + "origin_prompt": "Acting as an entailment detection instrument, determine if the given pair of sentences demonstrates entailment or not_entailment. Answer with 'entailment' or 'not_entailment':", + "atk_acc": 46.48, + "atk_prompt": "Acting as an entailment detection instrument, determine if the given torque of sentences depicting entailment or not_entailment. Reactions with 'entailment' or 'not_entailment':", + "transfer_ori_acc": 78.87323943661971, + "transfer_atk_acc": 78.87323943661971 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 64.0, + "origin_prompt": "In relation to the multiple-choice question on {}, please provide the accurate answer by choosing 'A', 'B', 'C', or 'D'.", + "atk_acc": 50.5, + "atk_prompt": "In relation to the multiple-choice issue on {}, please delivering the clarification rebuttal by selector 'A', 'B', 'C', or 'D'. ", + "transfer_ori_acc": 51.95035460992907, + "transfer_atk_acc": 53.54609929078015 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 61.5, + "origin_prompt": "As a skilled person in the field of {}, please respond to the multiple-choice question by choosing 'A', 'B', 'C', or 'D'.", + "atk_acc": 54.5, + "atk_prompt": "As a expert person in the minefields of {}, please reacts to the multiple-choice question by choice 'A', 'B', 'C', or 'D'.", + "transfer_ori_acc": 52.4822695035461, + "transfer_atk_acc": 53.90070921985816 + }, + { + "dataset": "squad_v2", + "type": "task", + "origin_acc": 70.01, + "origin_prompt": "Based on the given context, provide the best possible answer. If there's no answer available in the context, respond with 'unanswerable'.", + "atk_acc": 5.68, + "atk_prompt": "Based on the received backgrounds, provide the nicest unlikely respond. Although there's no riposte available in the context, reactions with 'unanswerable'.", + "transfer_ori_acc": 89.38229949874687, + "transfer_atk_acc": 64.65591060985798 + }, + { + "dataset": "squad_v2", + "type": "role", + "origin_acc": 69.17, + "origin_prompt": "Using your knowledge of the context, identify the best answer to the question. If the context doesn't provide an answer, write 'unanswerable'.", + "atk_acc": 12.54, + "atk_prompt": "Using your skill of the background, specify the improved rebuttal to the question. While the context doesn't delivering an reactions, writings 'unanswerable'.", + "transfer_ori_acc": 88.31979949874687, + "transfer_atk_acc": 83.85692382078497 + }, + { + "dataset": "iwslt", + "type": "task", + "origin_acc": 0.22, + "origin_prompt": "Using machine translation, convert the given sentence from {} into the {} language:", + "atk_acc": 0.0, + "atk_prompt": "Using typewriter translation, transformation the given sentence from {} into the {} pronunciation:", + "transfer_ori_acc": 0.17087237648129655, + "transfer_atk_acc": 0.18145247530896158 + }, + { + "dataset": "iwslt", + "type": "role", + "origin_acc": 0.22, + "origin_prompt": "Taking on the role of a certified translator, perform a translation of the given sentence from {} to {}:", + "atk_acc": 0.0, + "atk_prompt": "Select on the features of a certifiable translators, conducting a translation of the gave sentencing from {} to {}: ", + "transfer_ori_acc": 0.1727120569025014, + "transfer_atk_acc": 0.17242972008557814 + }, + { + "dataset": "un_multi", + "type": "task", + "origin_acc": 0.34, + "origin_prompt": "Translate the subsequent text passage from its original {} language to the {} language:", + "atk_acc": 0.07, + "atk_prompt": "Translate the sequential version passage from its primitive {} grammar to the {} pronunciation:", + "transfer_ori_acc": 0.31391205501691966, + "transfer_atk_acc": 0.29980040349878595 + }, + { + "dataset": "un_multi", + "type": "role", + "origin_acc": 0.34, + "origin_prompt": "While acting as an experienced language expert, translate the following text from {} to {}:", + "atk_acc": 0.27, + "atk_prompt": "While acting as an suffered dialect expert, translate the below laws from {} to {}: ", + "transfer_ori_acc": 0.3133010160127483, + "transfer_atk_acc": 0.30695283823442066 + }, + { + "dataset": "math", + "type": "task", + "origin_acc": 6.88, + "origin_prompt": "Obtain the result for this math question regarding {}:", + "atk_acc": 0.0, + "atk_prompt": "Obtain the achievements for this math question respecting {}:", + "transfer_ori_acc": 13.750000000000002, + "transfer_atk_acc": 13.750000000000002 + }, + { + "dataset": "math", + "type": "role", + "origin_acc": 5.0, + "origin_prompt": "As a mathematics instructor, calculate the answer to the following problem related to {}:", + "atk_acc": 0.0, + "atk_prompt": "As a mathematics instructor, calculate the rebuttal to the following problem related to {}: ", + "transfer_ori_acc": 15.0, + "transfer_atk_acc": 8.125 + } +] \ No newline at end of file diff --git a/results_transfer/chatgpt_google-flan-ul2/textfooler_3_shot.json b/results_transfer/chatgpt_google-flan-ul2/textfooler_3_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..66d788006a409f05d4cadf0c41618f4e61ec096f --- /dev/null +++ b/results_transfer/chatgpt_google-flan-ul2/textfooler_3_shot.json @@ -0,0 +1,242 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 80.5, + "origin_prompt": "Examine the sentence and decide if its grammar is 'Acceptable' or 'Unacceptable'.", + "atk_acc": 39.5, + "atk_prompt": "Exams the sentence and defining if its jargon is 'Acceptable' or 'Unacceptable'. ", + "transfer_ori_acc": 85.9, + "transfer_atk_acc": 83.1 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 81.0, + "origin_prompt": "In the role of a syntax analyzer, examine the grammar of the provided sentence and decide if it is correct, answering with 'acceptable' for grammatically correct sentences or 'unacceptable' for incorrect ones.", + "atk_acc": 76.0, + "atk_prompt": "Across the role of a syntax analyzer, examine the dictionary of the provided chastisement and decide if it is adequate, answered with 'acceptable' for grammatically correct sentences or 'unacceptable' for deceptive ones. ", + "transfer_ori_acc": 86.5, + "transfer_atk_acc": 87.0 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 73.0, + "origin_prompt": "Assess if the two given sentences have equivalent meanings by selecting 'equivalent' or 'not_equivalent'.", + "atk_acc": 59.0, + "atk_prompt": "Appraisal if the two accorded sentences enjoys equivalent connotation by choice 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 84.06862745098039, + "transfer_atk_acc": 85.04901960784314 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 71.5, + "origin_prompt": "In the role of a textual comparison specialist, examine the given sentences and decide if they share the same meaning, responding with 'equivalent' or 'not_equivalent'.", + "atk_acc": 57.0, + "atk_prompt": "De the role of a textual comparisons scholar, revisited the given sentences and decide if they exchange the same feel, responding with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 87.00980392156863, + "transfer_atk_acc": 86.02941176470588 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 84.5, + "origin_prompt": "Examine the context and question, and determine if the context logically implies the answer by selecting 'entailment' or 'not_entailment'.", + "atk_acc": 63.5, + "atk_prompt": "Discuss the background and question, and determine if the framework intuitively presupposes the answer by akhtar 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 93.5, + "transfer_atk_acc": 93.7 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 85.5, + "origin_prompt": "As a textual analyst, examine if the given context logically implies the answer to the question and indicate your decision with 'entailment' or 'not_entailment'.", + "atk_acc": 68.5, + "atk_prompt": "Iike a textual analyst, analyze if the submitted regard rationally implies the replies to the issuing and indicates your clout with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 93.2, + "transfer_atk_acc": 93.0 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 83.5, + "origin_prompt": "Determine if the given pair of statements can be considered the same by responding with 'equivalent' or 'not_equivalent'.", + "atk_acc": 80.0, + "atk_prompt": "Determine if the given pair of representations can constitute felt the same by responding with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 88.8, + "transfer_atk_acc": 88.8 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 83.0, + "origin_prompt": "Acting as a question equivalence instrument, determine if the provided questions are equivalent in meaning, answering with 'equivalent' for similar questions or 'not_equivalent' for dissimilar ones.", + "atk_acc": 80.0, + "atk_prompt": "Interim as a issues equivalence instrument, determine if the provided aspects are equivalent in meaning, answering with 'equivalent' for selfsame questions or 'not_equivalent' for dissimilar ones. ", + "transfer_ori_acc": 88.6, + "transfer_atk_acc": 88.4 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 84.5, + "origin_prompt": "Does the relationship between the given sentences represent entailment or not_entailment? Respond with 'entailment' or 'not_entailment'.", + "atk_acc": 79.5, + "atk_prompt": "Do the coitus between the given sentences comprised entailment or not_entailment? Meet with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 93.86281588447653, + "transfer_atk_acc": 93.14079422382672 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 83.0, + "origin_prompt": "As an entailment identification system, examine the connection between the following sentences and respond with 'entailment' or 'not_entailment'.", + "atk_acc": 79.5, + "atk_prompt": "As an entailment identification system, revisited the connection between the following sentences and respondent with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 92.7797833935018, + "transfer_atk_acc": 92.7797833935018 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 96.0, + "origin_prompt": "Read the provided excerpt and choose between 'positive' and 'negative' to describe its sentiment.", + "atk_acc": 85.0, + "atk_prompt": "Lu the rendered excerpt and withheld between 'positive' and 'negative' to describe its foreboding. ", + "transfer_ori_acc": 96.67431192660551, + "transfer_atk_acc": 96.78899082568807 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 97.5, + "origin_prompt": "Emulating a sentiment classification system, indicate whether the provided text is 'positive' or 'negative'.", + "atk_acc": 91.5, + "atk_prompt": "Emulating a ambience classification mechanisms, describe whether the presented writings is 'positive' or 'negative'. ", + "transfer_ori_acc": 96.55963302752293, + "transfer_atk_acc": 96.67431192660551 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 69.01, + "origin_prompt": "Indicate if the connection between the following sentences is 'entailment' or 'not_entailment'.", + "atk_acc": 59.15, + "atk_prompt": "Indicate if the connection between the subsequently sentences is 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 76.05633802816901, + "transfer_atk_acc": 76.05633802816901 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 66.2, + "origin_prompt": "In the role of a semantic relationship analyzer, examine the connection between the given sentences and decide if they exhibit entailment or not_entailment. Answer with 'entailment' or 'not_entailment'.", + "atk_acc": 57.75, + "atk_prompt": "In the role of a semantic contacts analyzer, analysed the connecting between the given sentences and choose if they exhibit entailment or not_entailment. Answer with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 76.05633802816901, + "transfer_atk_acc": 74.64788732394366 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 67.0, + "origin_prompt": "Please respond to the multiple-choice question about {} by selecting the appropriate answer: 'A', 'B', 'C', or 'D'.", + "atk_acc": 60.0, + "atk_prompt": "Asking reactions to the multiple-choice question about {} by selecting the appropriate reacts: 'A', 'B', 'C', or 'D'. ", + "transfer_ori_acc": 53.36879432624113, + "transfer_atk_acc": 52.659574468085104 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 66.0, + "origin_prompt": "With your knowledge of {}, tackle the following multiple-choice question by choosing 'A', 'B', 'C', or 'D'.", + "atk_acc": 61.5, + "atk_prompt": "From your acquaintances of {}, addressing the following multiple-choice question by choosing 'A', 'B', 'C', or 'D'.", + "transfer_ori_acc": 53.72340425531915, + "transfer_atk_acc": 53.36879432624113 + }, + { + "dataset": "squad_v2", + "type": "task", + "origin_acc": 72.8, + "origin_prompt": "Using the context, determine the most suitable answer. If the context doesn't contain the answer, write 'unanswerable'.", + "atk_acc": 63.67, + "atk_prompt": "Using the context, identifies the most suitable answer. Although the context doesn't comprise the meet, editor 'unanswerable'.", + "transfer_ori_acc": 90.04896616541353, + "transfer_atk_acc": 90.48646616541353 + }, + { + "dataset": "squad_v2", + "type": "role", + "origin_acc": 71.93, + "origin_prompt": "As a proficient expert in the given context, search for the most relevant answer. If the answer cannot be found, respond by saying 'unanswerable'.", + "atk_acc": 64.13, + "atk_prompt": "As a knowledge expert in the given context, find for the most relevant answering. Unless the answer cannot exist found, respond by told 'unanswerable'.", + "transfer_ori_acc": 90.54896616541353, + "transfer_atk_acc": 90.38229949874687 + }, + { + "dataset": "iwslt", + "type": "task", + "origin_acc": 0.23, + "origin_prompt": "Given the sentence below, perform a machine translation from {} to {}.", + "atk_acc": 0.18, + "atk_prompt": "Attributed the sentence below, meet a equipments interpreters from {} to {}. ", + "transfer_ori_acc": 0.23710409171247415, + "transfer_atk_acc": 0.21328830771164164 + }, + { + "dataset": "iwslt", + "type": "role", + "origin_acc": 0.24, + "origin_prompt": "In the role of a language expert, perform a machine translation for the provided sentence, changing it from {} to {}.", + "atk_acc": 0.23, + "atk_prompt": "Of the features of a language expert, performed a machine translation for the presented sentence, variation it from {} to {}. ", + "transfer_ori_acc": 0.236844571841243, + "transfer_atk_acc": 0.23842933704937957 + }, + { + "dataset": "un_multi", + "type": "task", + "origin_acc": 0.35, + "origin_prompt": "Translate the following text from the source language {} to the target language {}.", + "atk_acc": 0.33, + "atk_prompt": "Translate the following texts from the source lingo {} to the millennium phrasing {}. ", + "transfer_ori_acc": 0.35343571054545636, + "transfer_atk_acc": 0.3525503139881663 + }, + { + "dataset": "un_multi", + "type": "role", + "origin_acc": 0.35, + "origin_prompt": "As a qualified translator, convert the given sentence from its original {} language to the target language {}.", + "atk_acc": 0.34, + "atk_prompt": "As a nuanced translation, metamorphosis the gave sentence from its original {} terminology to the aspirations grammar {}. ", + "transfer_ori_acc": 0.35395992027011125, + "transfer_atk_acc": 0.35131474429213844 + }, + { + "dataset": "math", + "type": "task", + "origin_acc": 23.75, + "origin_prompt": "Compute the result of this math task concerning {}.", + "atk_acc": 1.88, + "atk_prompt": "Estimate the accomplishments of this math task concerning {}. ", + "transfer_ori_acc": 16.25, + "transfer_atk_acc": 15.625 + }, + { + "dataset": "math", + "type": "role", + "origin_acc": 22.5, + "origin_prompt": "As a mathematics instructor, calculate the answer to the following problem related to {}.", + "atk_acc": 1.88, + "atk_prompt": "As a computations instructor, derive the rebuttal to the following hassles links to {}. ", + "transfer_ori_acc": 15.625, + "transfer_atk_acc": 14.374999999999998 + } +] \ No newline at end of file diff --git a/results_transfer/chatgpt_google-flan-ul2/translation_0_shot.json b/results_transfer/chatgpt_google-flan-ul2/translation_0_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..927464ac325895c5449f2007db7b9a969f199945 --- /dev/null +++ b/results_transfer/chatgpt_google-flan-ul2/translation_0_shot.json @@ -0,0 +1,122 @@ +[ + { + "dataset": "cola", + "type": "translation", + "origin_acc": 79.5, + "origin_prompt": "Functioning as a grammar evaluation tool, analyze the given sentence and decide if it is grammatically correct, responding with 'acceptable' or 'unacceptable':", + "atk_acc": 25.0, + "atk_prompt": "Classify the grammar of a given sentence into two categories, 'acceptable' or 'unacceptable', and judge its accuracy.", + "transfer_ori_acc": 85.9, + "transfer_atk_acc": 84.8 + }, + { + "dataset": "mrpc", + "type": "translation", + "origin_acc": 76.5, + "origin_prompt": "Can the given sentences be considered semantically identical? Please reply with 'equivalent' or 'not_equivalent'.", + "atk_acc": 7.0, + "atk_prompt": "As an expert in semantic comparison, assess the similarity between the following two sentences and determine whether they are 'equivalent' or 'not_equivalent'.", + "transfer_ori_acc": 87.00980392156863, + "transfer_atk_acc": 88.97058823529412 + }, + { + "dataset": "qnli", + "type": "translation", + "origin_acc": 79.5, + "origin_prompt": "Assess if the answer to the question can be logically concluded from the provided context by choosing 'entailment' or 'not_entailment'.", + "atk_acc": 56.5, + "atk_prompt": "Check the text link to the question and answer 'entailment' or 'not_entailment', depending on your language skills.", + "transfer_ori_acc": 94.0, + "transfer_atk_acc": 93.8 + }, + { + "dataset": "qqp", + "type": "translation", + "origin_acc": 84.0, + "origin_prompt": "Determine if the given pair of statements can be considered the same by responding with 'equivalent' or 'not_equivalent'.", + "atk_acc": 3.0, + "atk_prompt": "As a tool for determining the equivalence of problems, the problems are examined and their similarity is classified as 'equivalent' or 'not_equivalent'.", + "transfer_ori_acc": 88.1, + "transfer_atk_acc": 87.3 + }, + { + "dataset": "rte", + "type": "translation", + "origin_acc": 84.0, + "origin_prompt": "Determine if the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment'.", + "atk_acc": 39.0, + "atk_prompt": "Determine whether the given sentences have a cause-effect relationship as an engagement analysis tool and categorize them as 'entailment' or 'not_entailment'.", + "transfer_ori_acc": 93.50180505415162, + "transfer_atk_acc": 92.05776173285199 + }, + { + "dataset": "sst2", + "type": "translation", + "origin_acc": 96.0, + "origin_prompt": "Read the provided excerpt and choose between 'positive' and 'negative' to describe its sentiment:", + "atk_acc": 29.5, + "atk_prompt": "In the context of textual analysis, what classification is this phrase between 'positive' and 'negative'?", + "transfer_ori_acc": 96.44495412844036, + "transfer_atk_acc": 96.3302752293578 + }, + { + "dataset": "wnli", + "type": "translation", + "origin_acc": 67.61, + "origin_prompt": "Examine the pair of sentences and determine if they exhibit entailment or not_entailment. Answer with either 'entailment' or 'not_entailment'.", + "atk_acc": 26.76, + "atk_prompt": "Using implication analysis, determine whether the given sentences have a cause-effect relationship and categorize them as 'entailment' or 'not_entailment'.", + "transfer_ori_acc": 78.87323943661971, + "transfer_atk_acc": 76.05633802816901 + }, + { + "dataset": "mmlu", + "type": "translation", + "origin_acc": 64.0, + "origin_prompt": "In relation to the multiple-choice question on {}, please provide the accurate answer by choosing 'A', 'B', 'C', or 'D'.", + "atk_acc": 49.5, + "atk_prompt": "Choose the correct answer to the multiple-choice question below. Is '{}' an 'A', 'B', 'C', or 'D'.", + "transfer_ori_acc": 51.95035460992907, + "transfer_atk_acc": 36.52482269503546 + }, + { + "dataset": "squad_v2", + "type": "translation", + "origin_acc": 72.37, + "origin_prompt": "Discover the best answer based on the context. If the context doesn't include an answer, respond with 'unanswerable'.", + "atk_acc": 46.08, + "atk_prompt": "Please tell me what your question is about. If there is no context in which you can provide an answer, respond with 'unanswerable'.", + "transfer_ori_acc": 89.38229949874687, + "transfer_atk_acc": 90.04896616541353 + }, + { + "dataset": "iwslt", + "type": "translation", + "origin_acc": 0.22, + "origin_prompt": "Using machine translation, convert the given sentence from {} into the {} language:", + "atk_acc": 0.11, + "atk_prompt": "Please convert the sentence below to {}, and then translate it to {},", + "transfer_ori_acc": 0.17087237648129655, + "transfer_atk_acc": 0.16364025086187875 + }, + { + "dataset": "un_multi", + "type": "translation", + "origin_acc": 0.34, + "origin_prompt": "Perform a machine translation for the provided sentence, changing it from {} to {}:", + "atk_acc": 0.16, + "atk_prompt": "Please convert the sentence below to {}, and then translate it to {},", + "transfer_ori_acc": 0.3102455120610509, + "transfer_atk_acc": 0.3111101089578496 + }, + { + "dataset": "math", + "type": "translation", + "origin_acc": 6.88, + "origin_prompt": "Calculate the answer to the following math query about {}:", + "atk_acc": 3.75, + "atk_prompt": "Use {} to solve the following mathematical problem.", + "transfer_ori_acc": 14.374999999999998, + "transfer_atk_acc": 13.750000000000002 + } +] \ No newline at end of file diff --git a/results_transfer/chatgpt_google-flan-ul2/translation_3_shot.json b/results_transfer/chatgpt_google-flan-ul2/translation_3_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..c4e8ce28a19f8e5c07f63eaf9ece0c82cfb8b757 --- /dev/null +++ b/results_transfer/chatgpt_google-flan-ul2/translation_3_shot.json @@ -0,0 +1,122 @@ +[ + { + "dataset": "cola", + "type": "translation", + "origin_acc": 81.5, + "origin_prompt": "Functioning as a grammar evaluation tool, analyze the given sentence and decide if it is grammatically correct, responding with 'acceptable' or 'unacceptable'.", + "atk_acc": 68.5, + "atk_prompt": "Classify the grammar of a given sentence as 'acceptable' or 'unacceptable', and judge its accuracy.", + "transfer_ori_acc": 86.4, + "transfer_atk_acc": 86.0 + }, + { + "dataset": "mrpc", + "type": "translation", + "origin_acc": 74.5, + "origin_prompt": "Compare the following sentences and determine if they share the same semantic meaning by responding with 'equivalent' or 'not_equivalent'.", + "atk_acc": 65.0, + "atk_prompt": "Based on my experience in semantic analysis, classify the following two sentences between 'equivalent' or 'not_equivalent'.", + "transfer_ori_acc": 86.27450980392157, + "transfer_atk_acc": 85.04901960784314 + }, + { + "dataset": "qnli", + "type": "translation", + "origin_acc": 86.0, + "origin_prompt": "As a textual analyst, examine if the given context logically implies the answer to the question and indicate your decision with 'entailment' or 'not_entailment'.", + "atk_acc": 71.5, + "atk_prompt": "Check the text link to the question and answer 'entailment' or 'not_entailment', depending on your language skills.", + "transfer_ori_acc": 93.2, + "transfer_atk_acc": 93.0 + }, + { + "dataset": "qqp", + "type": "translation", + "origin_acc": 84.0, + "origin_prompt": "In the capacity of a question assessment system, indicate if the meaning of the provided questions is the same, responding with 'equivalent' or 'not_equivalent'.", + "atk_acc": 80.5, + "atk_prompt": "By analyzing the similarity of these problems, they are divided into categories of 'equivalent' or 'not_equivalent'.", + "transfer_ori_acc": 88.7, + "transfer_atk_acc": 89.60000000000001 + }, + { + "dataset": "rte", + "type": "translation", + "origin_acc": 84.5, + "origin_prompt": "Does the relationship between the given sentences represent entailment or not_entailment? Respond with 'entailment' or 'not_entailment'.", + "atk_acc": 80.0, + "atk_prompt": "Determine whether the given sentences have a cause-effect relationship as an engagement analysis tool and categorize them as 'entailment' or 'not_entailment'.", + "transfer_ori_acc": 93.86281588447653, + "transfer_atk_acc": 92.4187725631769 + }, + { + "dataset": "sst2", + "type": "translation", + "origin_acc": 97.5, + "origin_prompt": "In the capacity of a sentiment classifier, decide whether the given quote is 'positive' or 'negative'.", + "atk_acc": 68.5, + "atk_prompt": "In the context of textual analysis, what classification is this phrase between 'positive' and 'negative'?", + "transfer_ori_acc": 96.44495412844036, + "transfer_atk_acc": 96.78899082568807 + }, + { + "dataset": "wnli", + "type": "translation", + "origin_acc": 74.65, + "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.", + "atk_acc": 59.15, + "atk_prompt": "Determinate the semantic connections of a given sentence and classify it as 'entailment' or 'not_entailment'.", + "transfer_ori_acc": 76.05633802816901, + "transfer_atk_acc": 76.05633802816901 + }, + { + "dataset": "mmlu", + "type": "translation", + "origin_acc": 67.0, + "origin_prompt": "Please respond to the multiple-choice question about {} by selecting the appropriate answer: 'A', 'B', 'C', or 'D'.", + "atk_acc": 63.0, + "atk_prompt": "Rate your understanding of the multiple-choice question on {}. Choose the correct answer from options 'A', 'B', 'C' or 'D'.", + "transfer_ori_acc": 53.36879432624113, + "transfer_atk_acc": 53.72340425531915 + }, + { + "dataset": "squad_v2", + "type": "translation", + "origin_acc": 74.58, + "origin_prompt": "Please derive the most fitting answer from the context. If there isn't an answer in the context, use 'unanswerable'.", + "atk_acc": 67.46, + "atk_prompt": "Please provide additional information about the facts mentioned in this sentence. If no information is available, respond with 'unanswerable'.", + "transfer_ori_acc": 90.54896616541353, + "transfer_atk_acc": 90.88229949874687 + }, + { + "dataset": "iwslt", + "type": "translation", + "origin_acc": 0.24, + "origin_prompt": "Convert the following sentence from its original {} language to the target language {}.", + "atk_acc": 0.17, + "atk_prompt": "Please translate the sentences below into {}, then {}.", + "transfer_ori_acc": 0.23981220411098694, + "transfer_atk_acc": 0.23367230863817542 + }, + { + "dataset": "un_multi", + "type": "translation", + "origin_acc": 0.36, + "origin_prompt": "Translate the following text from the source language {} to the target language {}.", + "atk_acc": 0.25, + "atk_prompt": "Please translate the following sentences into {}, then {}.", + "transfer_ori_acc": 0.35343571054545636, + "transfer_atk_acc": 0.3537894516935946 + }, + { + "dataset": "math", + "type": "translation", + "origin_acc": 23.12, + "origin_prompt": "Compute the result of this math task concerning {}.", + "atk_acc": 18.75, + "atk_prompt": "Please calculate the answers related to {}.", + "transfer_ori_acc": 16.25, + "transfer_atk_acc": 16.875 + } +] \ No newline at end of file diff --git a/results_transfer/chatgpt_vicuna-13b/bertattack_0_shot.json b/results_transfer/chatgpt_vicuna-13b/bertattack_0_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..e2c51799dd061db7d6b1b9ac0fbac378cda51c13 --- /dev/null +++ b/results_transfer/chatgpt_vicuna-13b/bertattack_0_shot.json @@ -0,0 +1,162 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 78.5, + "origin_prompt": "Assess the grammatical structure of the given sentence and classify it as 'Acceptable' or 'Unacceptable':", + "atk_acc": 0.0, + "atk_prompt": "what the grammatical structure of the given sentence and consider it as 'Acceptable' or 'Unacceptable':", + "transfer_ori_acc": 50.5, + "transfer_atk_acc": 38.3 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 80.0, + "origin_prompt": "While performing grammar analysis, classify the grammar of the following sentence as 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar:", + "atk_acc": 0.0, + "atk_prompt": "While performing grammar analysis, classify the grammar of the following sentence as 'acceptable' for so grammar or 'unacceptable' for abnormal grammar:", + "transfer_ori_acc": 27.0, + "transfer_atk_acc": 30.7 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 75.5, + "origin_prompt": "Examine the pair of sentences and decide if their meanings are identical by answering with 'equivalent' or 'not_equivalent'.", + "atk_acc": 56.0, + "atk_prompt": "Examine the pairs of pronouns and evaluate if their meanings are different by answering with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 64.2156862745098, + "transfer_atk_acc": 28.186274509803923 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 75.5, + "origin_prompt": "In your capacity as a semantic assessment tool, evaluate the provided sentences and classify their meanings as 'equivalent' or 'not_equivalent'.", + "atk_acc": 0.0, + "atk_prompt": "In your capacity as a semantic assessment tool, evaluate the provided tense and classify their constituents as 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 48.03921568627451, + "transfer_atk_acc": 36.029411764705884 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 79.0, + "origin_prompt": "Assess if the answer to the question can be logically concluded from the provided context by choosing 'entailment' or 'not_entailment'.", + "atk_acc": 0.0, + "atk_prompt": "and if the answer to the question can be logically concluded from the provided context by avoiding 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 19.7, + "transfer_atk_acc": 2.5 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 79.5, + "origin_prompt": "As a textual inference expert, analyze if the answer to the question can be deduced from the provided context and select 'entailment' or 'not_entailment'.", + "atk_acc": 0.0, + "atk_prompt": "As a textual inference expert, analysis if the answer to the question can be deduced from the provided citation and define 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 32.5, + "transfer_atk_acc": 12.9 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 79.5, + "origin_prompt": "Can these two statements be considered equal in meaning? Answer with 'equivalent' or 'not_equivalent'.", + "atk_acc": 1.0, + "atk_prompt": "Can these two constructions be considered equal in meaning? object with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 22.900000000000002, + "transfer_atk_acc": 2.3 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 82.5, + "origin_prompt": "As an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions.", + "atk_acc": 0.0, + "atk_prompt": "give an table for category comparison evaluation, consider the questions and determine if their concept is the same, responding with 'equivalent' for similarity questions or 'not_equivalent' for different knowledge. ", + "transfer_ori_acc": 26.200000000000003, + "transfer_atk_acc": 6.2 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 82.5, + "origin_prompt": "Determine if the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment'.", + "atk_acc": 55.5, + "atk_prompt": "determine if the following singular of sentence indicating entailment or not_entailment. contrast with 'entailment' or 'not_entailment'.", + "transfer_ori_acc": 41.15523465703971, + "transfer_atk_acc": 0.36101083032490977 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 81.5, + "origin_prompt": "Working as an entailment classifier, identify whether the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment':", + "atk_acc": 37.5, + "atk_prompt": "at as an entailment classifier, know which the given type of verb displays entailment or not_entailment. object with 'entailment' or 'not_entailment':", + "transfer_ori_acc": 49.81949458483754, + "transfer_atk_acc": 33.2129963898917 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 90.0, + "origin_prompt": "Review this statement and decide whether it has a 'positive' or 'negative' sentiment:", + "atk_acc": 0.0, + "atk_prompt": "what this assertion and are whether it has a 'positive' or 'negative' sentiment: ", + "transfer_ori_acc": 42.54587155963303, + "transfer_atk_acc": 27.75229357798165 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 93.0, + "origin_prompt": "Functioning as a sentiment identification tool, assess if the following expression is 'positive' or 'negative'. Classify:", + "atk_acc": 76.0, + "atk_prompt": "from as a sentiments identification tool, assess if the same word is 'positive' or 'negative'. function: ", + "transfer_ori_acc": 38.646788990825684, + "transfer_atk_acc": 19.495412844036696 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 69.01, + "origin_prompt": "Identify whether the given pair of sentences demonstrates entailment or not_entailment. Answer with 'entailment' or 'not_entailment'.", + "atk_acc": 47.89, + "atk_prompt": "Identify whether the given individual of verbs demonstrates entailment or not_entailment. have with 'entailment' or 'not_entailment'.", + "transfer_ori_acc": 33.80281690140845, + "transfer_atk_acc": 2.8169014084507045 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 60.56, + "origin_prompt": "Working as an entailment classifier, identify whether the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment':", + "atk_acc": 47.89, + "atk_prompt": "Working as an entailment classifier, identify whether the given corpus of verbs displays entailment or not_entailment. act with 'entailment' or 'not_entailment':", + "transfer_ori_acc": 43.66197183098591, + "transfer_atk_acc": 19.718309859154928 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 63.5, + "origin_prompt": "Regarding the following multiple-choice question on {}, pick the correct answer from the options 'A', 'B', 'C', or 'D'.", + "atk_acc": 14.5, + "atk_prompt": "1 the first multiple-choice question on {}, remove the following answer from the options 'A', 'B', 'C', or 'D'. ", + "transfer_ori_acc": 18.439716312056735, + "transfer_atk_acc": 31.914893617021278 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 62.0, + "origin_prompt": "Drawing upon your mastery of {}, please answer the multiple-choice question by selecting the correct option from 'A', 'B', 'C', or 'D'.", + "atk_acc": 1.0, + "atk_prompt": "Drawing upon your mastery of {}, simply question the multiple-selecting question by selecting the wrong color from 'A', 'B', 'C', or 'D'.", + "transfer_ori_acc": 22.340425531914892, + "transfer_atk_acc": 17.19858156028369 + } +] \ No newline at end of file diff --git a/results_transfer/chatgpt_vicuna-13b/bertattack_3_shot.json b/results_transfer/chatgpt_vicuna-13b/bertattack_3_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..2fd20e98af143d123b8ae01ed660055747ea9016 --- /dev/null +++ b/results_transfer/chatgpt_vicuna-13b/bertattack_3_shot.json @@ -0,0 +1,162 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 81.0, + "origin_prompt": "Analyze the provided sentence and classify its grammatical correctness as 'Acceptable' or 'Unacceptable'.", + "atk_acc": 59.0, + "atk_prompt": "Analyze the reported speech and classify its inclusive correcting as 'Acceptable' or 'Unacceptable'. ", + "transfer_ori_acc": 10.9, + "transfer_atk_acc": 28.299999999999997 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 81.0, + "origin_prompt": "In the role of a syntax analyzer, examine the grammar of the provided sentence and decide if it is correct, answering with 'acceptable' for grammatically correct sentences or 'unacceptable' for incorrect ones.", + "atk_acc": 75.5, + "atk_prompt": "using the art of a level analyzeg, examine the grammar of the provided sentence and decide if it is genuine, answering with 'acceptable' for grammatically correct sentences or 'unacceptable' for incorrect ones. ", + "transfer_ori_acc": 3.4000000000000004, + "transfer_atk_acc": 14.899999999999999 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 74.0, + "origin_prompt": "Compare the following sentences and determine if they share the same semantic meaning by responding with 'equivalent' or 'not_equivalent'.", + "atk_acc": 47.5, + "atk_prompt": "Compare the following sentences and determine if they lack the same aspect marking by respectively with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 62.254901960784316, + "transfer_atk_acc": 58.82352941176471 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 71.5, + "origin_prompt": "In the role of a textual comparison specialist, examine the given sentences and decide if they share the same meaning, responding with 'equivalent' or 'not_equivalent'.", + "atk_acc": 47.0, + "atk_prompt": "have the role of a textual comparison academic, examine the given sentences and determine if they share the same length, responding with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 69.36274509803921, + "transfer_atk_acc": 67.15686274509804 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 84.0, + "origin_prompt": "Consider the context and question, and indicate if the answer can be logically deduced from the context by responding with 'entailment' or 'not_entailment'.", + "atk_acc": 71.0, + "atk_prompt": "put the contexts and question, and be if the answer can be logically deduced from the question by stating with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 27.500000000000004, + "transfer_atk_acc": 11.3 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 84.0, + "origin_prompt": "As a textual inference expert, analyze if the answer to the question can be deduced from the provided context and select 'entailment' or 'not_entailment'.", + "atk_acc": 70.0, + "atk_prompt": "As a structural inference expert, analyze if the answer to the challenge can be deduced from the query audience and select 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 40.300000000000004, + "transfer_atk_acc": 32.6 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 82.5, + "origin_prompt": "Are the following two questions equivalent or not? Answer me with \"equivalent\" or \"not_equivalent\".", + "atk_acc": 78.5, + "atk_prompt": "Are the following versus answers equivalent or not? Answer me with \"equivalent\" or \"not_equivalent\". ", + "transfer_ori_acc": 34.8, + "transfer_atk_acc": 34.5 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 83.5, + "origin_prompt": "As an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions.", + "atk_acc": 78.0, + "atk_prompt": "and an instrument for question comparison analysis, consider the questions and establish if their topic is the same, responding with 'equivalent' for matching responses or 'not_equivalent' for between questions. ", + "transfer_ori_acc": 35.3, + "transfer_atk_acc": 28.599999999999998 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 85.0, + "origin_prompt": "Review the two statements and categorize their relationship as either 'entailment' or 'not_entailment'.", + "atk_acc": 59.5, + "atk_prompt": "write the second statements and categorize their relationship as neither 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 50.54151624548736, + "transfer_atk_acc": 42.59927797833935 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 83.5, + "origin_prompt": "While performing entailment analysis, classify the relationship between the provided sentences as 'entailment' or 'not_entailment'.", + "atk_acc": 79.0, + "atk_prompt": "was performing entailment analysis, classify the relationship between the first phrase as 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 32.49097472924188, + "transfer_atk_acc": 38.26714801444043 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 95.5, + "origin_prompt": "After examining the following expression, label its emotion as either 'positive' or 'negative'.", + "atk_acc": 94.5, + "atk_prompt": "After examining the following expression, label its emotion as either 'positive' or 'negative' Q47Jmd4lMV. ", + "transfer_ori_acc": 7.912844036697248, + "transfer_atk_acc": 0.11467889908256881 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 97.5, + "origin_prompt": "In the capacity of a sentiment classifier, decide whether the given quote is 'positive' or 'negative'.", + "atk_acc": 96.5, + "atk_prompt": "In the capacity of a sentiment classifier, decide whether the given quote is 'positive' or 'negative' 9Q3S1F94fE. ", + "transfer_ori_acc": 20.18348623853211, + "transfer_atk_acc": 20.18348623853211 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 69.01, + "origin_prompt": "Review the two statements and categorize their relationship as either 'entailment' or 'not_entailment'.", + "atk_acc": 59.15, + "atk_prompt": "examine the paired actions and categorize their relationship as is 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 16.901408450704224, + "transfer_atk_acc": 8.450704225352112 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 66.2, + "origin_prompt": "In the role of a semantic relationship analyzer, examine the connection between the given sentences and decide if they exhibit entailment or not_entailment. Answer with 'entailment' or 'not_entailment'.", + "atk_acc": 56.34, + "atk_prompt": "In the role of a semantic relationship analyzer, define the connection between the given links and decide if they display entailment or not_entailment. Answer with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 5.633802816901409, + "transfer_atk_acc": 15.492957746478872 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 66.5, + "origin_prompt": "Please respond to the multiple-choice question about {} by selecting the appropriate answer: 'A', 'B', 'C', or 'D'.", + "atk_acc": 11.5, + "atk_prompt": "at respond to the multiple-independent question about {} by selecting the inappropriate question: 'A', 'B', 'C', or 'D'. ", + "transfer_ori_acc": 8.865248226950355, + "transfer_atk_acc": 12.411347517730496 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 65.5, + "origin_prompt": "Considering your familiarity with {}, attend to the following multiple-choice question by picking 'A', 'B', 'C', or 'D'.", + "atk_acc": 52.5, + "atk_prompt": "all your ability with {}, attend to the following multiple-select question by picking 'A', 'B', 'C', or 'D'.", + "transfer_ori_acc": 8.156028368794328, + "transfer_atk_acc": 7.2695035460992905 + } +] \ No newline at end of file diff --git a/results_transfer/chatgpt_vicuna-13b/checklist_0_shot.json b/results_transfer/chatgpt_vicuna-13b/checklist_0_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..aa475410eb859c90a701338cf8ad0bcf0c9c59df --- /dev/null +++ b/results_transfer/chatgpt_vicuna-13b/checklist_0_shot.json @@ -0,0 +1,162 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 78.5, + "origin_prompt": "Review the sentence below and identify whether its grammar is 'Acceptable' or 'Unacceptable':", + "atk_acc": 72.5, + "atk_prompt": "Review the sentence below and identify whether its grammar is 'Acceptable' or 'Unacceptable' zexKUTWJYG:", + "transfer_ori_acc": 28.9, + "transfer_atk_acc": 22.7 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 79.0, + "origin_prompt": "While performing grammar analysis, classify the grammar of the following sentence as 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar:", + "atk_acc": 45.0, + "atk_prompt": "While performing grammar analysis, classify the grammar of the following sentence as 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar zexKUTWJYG:", + "transfer_ori_acc": 27.0, + "transfer_atk_acc": 38.6 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 75.5, + "origin_prompt": "Evaluate whether the two provided sentences convey the same meaning by answering 'equivalent' or 'not_equivalent'.", + "atk_acc": 72.0, + "atk_prompt": "Evaluate whether the two provided sentences convey the same meaning by answering 'equivalent' or 'not_equivalent' SvAp8RlOFn. ", + "transfer_ori_acc": 48.77450980392157, + "transfer_atk_acc": 0.49019607843137253 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 75.5, + "origin_prompt": "In your capacity as a semantic assessment tool, evaluate the provided sentences and classify their meanings as 'equivalent' or 'not_equivalent'.", + "atk_acc": 73.5, + "atk_prompt": "In your capacity as a semantic assessment tool, evaluate the provided sentences and classify their meanings as 'equivalent' or 'not_equivalent' Kw6nrs57gH. ", + "transfer_ori_acc": 48.03921568627451, + "transfer_atk_acc": 56.372549019607845 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 78.0, + "origin_prompt": "Evaluate whether the given context supports the answer to the question by responding with 'entailment' or 'not_entailment'.", + "atk_acc": 75.5, + "atk_prompt": "Evaluate whether the given context supports the answer to the question by responding with 'entailment' or 'not_entailment' ItGDrrA1Sc. ", + "transfer_ori_acc": 26.5, + "transfer_atk_acc": 16.5 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 78.0, + "origin_prompt": "As a semantic researcher, evaluate whether the provided context supports the answer to the question and choose 'entailment' or 'not_entailment'.", + "atk_acc": 74.5, + "atk_prompt": "As a semantic researcher, evaluate whether the provided context supports the answer to the question and choose 'entailment' or 'not_entailment' 6a4Yn3RGVc. ", + "transfer_ori_acc": 13.4, + "transfer_atk_acc": 11.200000000000001 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 81.5, + "origin_prompt": "Assess whether the following statements are identical in meaning by answering 'equivalent' or 'not_equivalent'.", + "atk_acc": 78.0, + "atk_prompt": "Assess whether the following statements are identical in meaning by answering 'equivalent' or 'not_equivalent' iia2yL9Uzm. ", + "transfer_ori_acc": 30.2, + "transfer_atk_acc": 26.900000000000002 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 83.0, + "origin_prompt": "As an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions.", + "atk_acc": 80.5, + "atk_prompt": "As an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions Cev0s49fbe. ", + "transfer_ori_acc": 26.200000000000003, + "transfer_atk_acc": 24.4 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 82.5, + "origin_prompt": "Determine if the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment'.", + "atk_acc": 80.0, + "atk_prompt": "Determine if the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment' MSAddJ4D2a.", + "transfer_ori_acc": 41.15523465703971, + "transfer_atk_acc": 20.577617328519857 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 81.5, + "origin_prompt": "As an instrument for entailment evaluation, consider the two sentences and determine if their relationship is 'entailment' or 'not_entailment'. Respond with 'entailment' or 'not_entailment':", + "atk_acc": 80.0, + "atk_prompt": "As an instrument for entailment evaluation, consider the two sentences and determine if their relationship is 'entailment' or 'not_entailment'. Respond with 'entailment' or 'not_entailment' 6a4Yn3RGVc:", + "transfer_ori_acc": 24.90974729241877, + "transfer_atk_acc": 6.859205776173286 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 93.0, + "origin_prompt": "Determine the overall sentiment of this sentence, categorizing it as 'positive' or 'negative':", + "atk_acc": 90.0, + "atk_prompt": "Determine the overall sentiment of this sentence, categorizing it as 'positive' or 'negative' AMsRIKZniY: ", + "transfer_ori_acc": 14.3348623853211, + "transfer_atk_acc": 6.8807339449541285 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 92.5, + "origin_prompt": "Functioning as a sentiment identification tool, assess if the following expression is 'positive' or 'negative'. Classify:", + "atk_acc": 92.0, + "atk_prompt": "Functioning as a sentiment identification tool, assess if the following expression is 'positive' or 'negative'. Classify Cev0s49fbe: ", + "transfer_ori_acc": 38.646788990825684, + "transfer_atk_acc": 33.48623853211009 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 64.79, + "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.", + "atk_acc": 59.15, + "atk_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word zexKUTWJYG. ", + "transfer_ori_acc": 25.352112676056336, + "transfer_atk_acc": 2.8169014084507045 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 63.38, + "origin_prompt": "In the role of a semantic relationship analyzer, examine the connection between the given sentences and decide if they exhibit entailment or not_entailment. Answer with 'entailment' or 'not_entailment':", + "atk_acc": 57.75, + "atk_prompt": "In the role of a semantic relationship analyzer, examine the connection between the given sentences and decide if they exhibit entailment or not_entailment. Answer with 'entailment' or 'not_entailment' 5XeflW1ZJc:", + "transfer_ori_acc": 43.66197183098591, + "transfer_atk_acc": 29.577464788732392 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 64.0, + "origin_prompt": "Regarding the following multiple-choice question on {}, pick the correct answer from the options 'A', 'B', 'C', or 'D'.", + "atk_acc": 58.0, + "atk_prompt": "Regarding the following multiple-choice question on {}, pick the correct answer from the options 'A', 'B', 'C', or 'D' Pbl3tYuoRo. ", + "transfer_ori_acc": 18.439716312056735, + "transfer_atk_acc": 23.22695035460993 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 61.5, + "origin_prompt": "As a skilled person in the field of {}, please respond to the multiple-choice question by choosing 'A', 'B', 'C', or 'D'.", + "atk_acc": 58.0, + "atk_prompt": "As a skilled person in the field of {}, please respond to the multiple-choice question by choosing 'A', 'B', 'C', or 'D' IhuBIhoPGc.", + "transfer_ori_acc": 20.0354609929078, + "transfer_atk_acc": 18.085106382978726 + } +] \ No newline at end of file diff --git a/results_transfer/chatgpt_vicuna-13b/checklist_3_shot.json b/results_transfer/chatgpt_vicuna-13b/checklist_3_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..cf2b8d2c468dc67d37be4682fd5f06014cc15af9 --- /dev/null +++ b/results_transfer/chatgpt_vicuna-13b/checklist_3_shot.json @@ -0,0 +1,162 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 81.0, + "origin_prompt": "Examine the sentence and decide if its grammar is 'Acceptable' or 'Unacceptable'.", + "atk_acc": 78.0, + "atk_prompt": "Examine the sentence and decide if its grammar is 'Acceptable' or 'Unacceptable' 9Q3S1F94fE. ", + "transfer_ori_acc": 41.8, + "transfer_atk_acc": 40.8 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 81.5, + "origin_prompt": "Functioning as a grammar evaluation tool, analyze the given sentence and decide if it is grammatically correct, responding with 'acceptable' or 'unacceptable'.", + "atk_acc": 79.5, + "atk_prompt": "Functioning as a grammar evaluation tool, analyze the given sentence and decide if it is grammatically correct, responding with 'acceptable' or 'unacceptable' 6a4Yn3RGVc. ", + "transfer_ori_acc": 23.9, + "transfer_atk_acc": 13.4 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 74.0, + "origin_prompt": "Assess if the two given sentences have equivalent meanings by selecting 'equivalent' or 'not_equivalent'.", + "atk_acc": 68.5, + "atk_prompt": "Assess if the two given sentences have equivalent meanings by selecting 'equivalent' or 'not_equivalent' LkF0FZxMZ4. ", + "transfer_ori_acc": 69.36274509803921, + "transfer_atk_acc": 64.2156862745098 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 72.0, + "origin_prompt": "As a linguistic comparator, review the following pair of sentences and determine their semantic equivalence by choosing 'equivalent' or 'not_equivalent'.", + "atk_acc": 68.0, + "atk_prompt": "As a linguistic comparator, review the following pair of sentences and determine their semantic equivalence by choosing 'equivalent' or 'not_equivalent' bhrRSokrfa. ", + "transfer_ori_acc": 61.51960784313726, + "transfer_atk_acc": 65.68627450980392 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 84.5, + "origin_prompt": "Examine the context and question, and determine if the context logically implies the answer by selecting 'entailment' or 'not_entailment'.", + "atk_acc": 80.5, + "atk_prompt": "Examine the context and question, and determine if the context logically implies the answer by selecting 'entailment' or 'not_entailment' 9Q3S1F94fE. ", + "transfer_ori_acc": 26.900000000000002, + "transfer_atk_acc": 10.0 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 84.5, + "origin_prompt": "As a textual inference expert, analyze if the answer to the question can be deduced from the provided context and select 'entailment' or 'not_entailment'.", + "atk_acc": 81.0, + "atk_prompt": "As a textual inference expert, analyze if the answer to the question can be deduced from the provided context and select 'entailment' or 'not_entailment' YHQiDfFhw4. ", + "transfer_ori_acc": 40.300000000000004, + "transfer_atk_acc": 15.1 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 82.0, + "origin_prompt": "Can these two statements be considered equal in meaning? Answer with 'equivalent' or 'not_equivalent'.", + "atk_acc": 79.0, + "atk_prompt": "Can these two statements be considered equal in meaning? Answer with 'equivalent' or 'not_equivalent' xikCjkMydH. ", + "transfer_ori_acc": 36.3, + "transfer_atk_acc": 28.1 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 83.0, + "origin_prompt": "In the capacity of a question assessment system, indicate if the meaning of the provided questions is the same, responding with 'equivalent' or 'not_equivalent'.", + "atk_acc": 81.5, + "atk_prompt": "In the capacity of a question assessment system, indicate if the meaning of the provided questions is the same, responding with 'equivalent' or 'not_equivalent' 5XeflW1ZJc. ", + "transfer_ori_acc": 46.9, + "transfer_atk_acc": 31.4 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 84.5, + "origin_prompt": "Review the two statements and categorize their relationship as either 'entailment' or 'not_entailment'.", + "atk_acc": 80.5, + "atk_prompt": "Review the two statements and categorize their relationship as either 'entailment' or 'not_entailment' 5XeflW1ZJc. ", + "transfer_ori_acc": 50.54151624548736, + "transfer_atk_acc": 48.375451263537904 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 84.0, + "origin_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment'.", + "atk_acc": 80.5, + "atk_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment' bhrRSokrfa. ", + "transfer_ori_acc": 46.931407942238266, + "transfer_atk_acc": 45.48736462093863 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 96.0, + "origin_prompt": "Read the provided excerpt and choose between 'positive' and 'negative' to describe its sentiment.", + "atk_acc": 77.5, + "atk_prompt": "go the read excerpt and be between 'positive' and 'negative' to describing its mood. ", + "transfer_ori_acc": 1.261467889908257, + "transfer_atk_acc": 2.637614678899083 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 97.5, + "origin_prompt": "In the capacity of a sentiment classifier, decide whether the given quote is 'positive' or 'negative'.", + "atk_acc": 13.5, + "atk_prompt": "in the capacity of a sentiment classizer, chose neither the said quotes is 'positive' or 'negative'. ", + "transfer_ori_acc": 20.18348623853211, + "transfer_atk_acc": 21.788990825688074 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 69.01, + "origin_prompt": "Indicate if the connection between the following sentences is 'entailment' or 'not_entailment'.", + "atk_acc": 61.97, + "atk_prompt": "Indicate if the connection between the followping senZences is 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 9.859154929577464, + "transfer_atk_acc": 9.859154929577464 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 66.2, + "origin_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment'.", + "atk_acc": 57.75, + "atk_prompt": "In your role as an entailment analysis tool, asress the rlationship between the given sentences and classify it as 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 23.943661971830984, + "transfer_atk_acc": 18.30985915492958 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 66.0, + "origin_prompt": "Please respond to the multiple-choice question about {} by selecting the appropriate answer: 'A', 'B', 'C', or 'D'.", + "atk_acc": 63.0, + "atk_prompt": "Please respond to the multiple-choice question about {} by selecting the appropriate answer: 'A', 'B', 'C', or 'D' MQnugHcaoy. ", + "transfer_ori_acc": 8.865248226950355, + "transfer_atk_acc": 4.787234042553192 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 65.5, + "origin_prompt": "As someone well-versed in {}, please address the multiple-choice question below by selecting 'A', 'B', 'C', or 'D'.", + "atk_acc": 63.0, + "atk_prompt": "As someone well-versed in {}, please address the multiple-choice question below by selecting 'A', 'B', 'C', or 'D' mOJ9nEwT4f.", + "transfer_ori_acc": 8.51063829787234, + "transfer_atk_acc": 9.042553191489363 + } +] \ No newline at end of file diff --git a/results_transfer/chatgpt_vicuna-13b/deepwordbug_0_shot.json b/results_transfer/chatgpt_vicuna-13b/deepwordbug_0_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..2c1ab261ce4493fbb69a0f7e68b0bd68214453dc --- /dev/null +++ b/results_transfer/chatgpt_vicuna-13b/deepwordbug_0_shot.json @@ -0,0 +1,162 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 79.0, + "origin_prompt": "Review the sentence below and identify whether its grammar is 'Acceptable' or 'Unacceptable':", + "atk_acc": 74.5, + "atk_prompt": "Review the sentece below and identify whether its grammar is 'Acceptable' or 'Unacceptable':", + "transfer_ori_acc": 28.9, + "transfer_atk_acc": 42.0 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 79.5, + "origin_prompt": "While performing grammar analysis, classify the grammar of the following sentence as 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar:", + "atk_acc": 0.0, + "atk_prompt": "nhile performing grammr analysis, classify the rgammar of the fllowing sentenZe as 'acceptable' for corFect Xrammar or 'unacceptable' for incorrect gramUar:", + "transfer_ori_acc": 27.0, + "transfer_atk_acc": 42.5 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 74.0, + "origin_prompt": "Evaluate whether the two provided sentences convey the same meaning by answering 'equivalent' or 'not_equivalent'.", + "atk_acc": 70.0, + "atk_prompt": "Evaluate whether the two provRided sentnces convey the same meaning by answering 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 48.77450980392157, + "transfer_atk_acc": 29.411764705882355 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 76.0, + "origin_prompt": "In your capacity as a semantic assessment tool, evaluate the provided sentences and classify their meanings as 'equivalent' or 'not_equivalent'.", + "atk_acc": 53.0, + "atk_prompt": "nI your capacity as a sematnic asseJssment tol, evaluate the prvoided senRtences and classify their meainngs as 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 48.03921568627451, + "transfer_atk_acc": 27.696078431372552 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 78.0, + "origin_prompt": "Evaluate whether the given context supports the answer to the question by responding with 'entailment' or 'not_entailment'.", + "atk_acc": 50.0, + "atk_prompt": "Exaluate whetxher the iven contet Supports the anwser to the queston by respEonding with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 26.5, + "transfer_atk_acc": 29.7 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 79.5, + "origin_prompt": "As a textual inference expert, analyze if the answer to the question can be deduced from the provided context and select 'entailment' or 'not_entailment'.", + "atk_acc": 19.5, + "atk_prompt": "As a textula inference expert, aInalyze if the answer to the question can be deducud from the provided context and Melect 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 32.5, + "transfer_atk_acc": 32.0 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 82.0, + "origin_prompt": "Assess whether the following statements are identical in meaning by answering 'equivalent' or 'not_equivalent'.", + "atk_acc": 45.0, + "atk_prompt": "Assess whethdr the followng sLtatements are identocal in meuaning by answerinJ 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 30.2, + "transfer_atk_acc": 23.0 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 82.5, + "origin_prompt": "As an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions.", + "atk_acc": 79.5, + "atk_prompt": "Ao an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions. ", + "transfer_ori_acc": 26.200000000000003, + "transfer_atk_acc": 12.2 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 83.5, + "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.", + "atk_acc": 79.0, + "atk_prompt": "ArYe the folowing two sentenes entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word. ", + "transfer_ori_acc": 27.075812274368232, + "transfer_atk_acc": 7.581227436823104 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 80.5, + "origin_prompt": "As an instrument for entailment evaluation, consider the two sentences and determine if their relationship is 'entailment' or 'not_entailment'. Respond with 'entailment' or 'not_entailment':", + "atk_acc": 78.5, + "atk_prompt": "IAs an instrument for entailment evaluatioRn, consider the otwo sentences and determine if their relationship is 'entailment' or 'not_entailment'. Respond with 'entailment' or 'not_entailment':", + "transfer_ori_acc": 24.90974729241877, + "transfer_atk_acc": 32.851985559566785 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 93.0, + "origin_prompt": "Determine the overall sentiment of this sentence, categorizing it as 'positive' or 'negative':", + "atk_acc": 88.5, + "atk_prompt": "Deetrmine the overall sentLment of this sentence, categorizing it as 'positive' or 'negative': ", + "transfer_ori_acc": 14.3348623853211, + "transfer_atk_acc": 6.422018348623854 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 93.0, + "origin_prompt": "Functioning as a sentiment identification tool, assess if the following expression is 'positive' or 'negative'. Classify:", + "atk_acc": 85.0, + "atk_prompt": "uFnctioning as a sentmient identification tool, assess if the folGowing expression is 'positive' or 'negative'. Classify: ", + "transfer_ori_acc": 38.646788990825684, + "transfer_atk_acc": 30.84862385321101 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 66.2, + "origin_prompt": "Examine the pair of sentences and determine if they exhibit entailment or not_entailment. Answer with either 'entailment' or 'not_entailment'.", + "atk_acc": 53.52, + "atk_prompt": "Examine the Bair of snetences and determine if they exhibit entailment or not_entailment. Ancwer with eMther 'entailment' or 'not_entailment'.", + "transfer_ori_acc": 33.80281690140845, + "transfer_atk_acc": 25.352112676056336 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 63.38, + "origin_prompt": "In the role of a semantic relationship analyzer, examine the connection between the given sentences and decide if they exhibit entailment or not_entailment. Answer with 'entailment' or 'not_entailment':", + "atk_acc": 53.52, + "atk_prompt": "IQn the role of a semantic relationship analyzer, eamine the connection between the given sentences and decide if they exhbit entailment or not_entailment. Answer with 'entailment' or 'not_entailment':", + "transfer_ori_acc": 43.66197183098591, + "transfer_atk_acc": 43.66197183098591 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 62.5, + "origin_prompt": "Regarding the following multiple-choice question on {}, pick the correct answer from the options 'A', 'B', 'C', or 'D'.", + "atk_acc": 60.0, + "atk_prompt": "Regarding the following multiple-choice question on {}, pick the correct answeWr from the options 'A', 'B', 'C', or 'D'. ", + "transfer_ori_acc": 18.439716312056735, + "transfer_atk_acc": 22.5177304964539 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 62.5, + "origin_prompt": "With your knowledge of {}, tackle the following multiple-choice question by choosing 'A', 'B', 'C', or 'D'.", + "atk_acc": 59.0, + "atk_prompt": "With your knowledge of {}, tackle the follQwing multiple-choice question by choosing 'A', 'B', 'C', or 'D'.", + "transfer_ori_acc": 22.872340425531913, + "transfer_atk_acc": 23.93617021276596 + } +] \ No newline at end of file diff --git a/results_transfer/chatgpt_vicuna-13b/deepwordbug_3_shot.json b/results_transfer/chatgpt_vicuna-13b/deepwordbug_3_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..4e73222f8a672aac1050f63375018c35316d5c90 --- /dev/null +++ b/results_transfer/chatgpt_vicuna-13b/deepwordbug_3_shot.json @@ -0,0 +1,162 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 80.5, + "origin_prompt": "Examine the given sentence and decide if it is grammatically sound. Answer with either 'Acceptable' or 'Unacceptable'.", + "atk_acc": 78.5, + "atk_prompt": "Examine the given sentence and decide if it is gramatically sound. nAswer with either 'Acceptable' or 'Unacceptable'. ", + "transfer_ori_acc": 0.8999999999999999, + "transfer_atk_acc": 0.4 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 81.5, + "origin_prompt": "Functioning as a grammar evaluation tool, analyze the given sentence and decide if it is grammatically correct, responding with 'acceptable' or 'unacceptable'.", + "atk_acc": 78.5, + "atk_prompt": "Funtioning as a graMmar evaluation tool, analyzY the given sentence and decide if it is grammatically corLect, responding with 'acceptable' or 'unacceptable'. ", + "transfer_ori_acc": 23.9, + "transfer_atk_acc": 53.7 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 74.5, + "origin_prompt": "Examine the pair of sentences and decide if their meanings are identical by answering with 'equivalent' or 'not_equivalent'.", + "atk_acc": 65.5, + "atk_prompt": "Examine the Kair of sentences and edcide if their meanijngs are ibentical by answering with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 55.63725490196079, + "transfer_atk_acc": 68.87254901960785 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 72.0, + "origin_prompt": "As a linguistic comparator, review the following pair of sentences and determine their semantic equivalence by choosing 'equivalent' or 'not_equivalent'.", + "atk_acc": 66.5, + "atk_prompt": "AV a linguistic comaprator, review the following pair of sentences and determine their semayntic equivalence by chhoosing 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 61.51960784313726, + "transfer_atk_acc": 64.2156862745098 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 85.0, + "origin_prompt": "Examine the context and question, and determine if the context logically implies the answer by selecting 'entailment' or 'not_entailment'.", + "atk_acc": 77.5, + "atk_prompt": "Examine the contedt and question, and determine if the cXontext logically implies the answer by selecting 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 26.900000000000002, + "transfer_atk_acc": 34.300000000000004 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 86.0, + "origin_prompt": "As a textual analyst, examine if the given context logically implies the answer to the question and indicate your decision with 'entailment' or 'not_entailment'.", + "atk_acc": 75.0, + "atk_prompt": "A a textual analyst, xamine if the gPven conSext logiclaly implies the asnwer to the qbuestion and indicate your decision with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 31.4, + "transfer_atk_acc": 43.6 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 82.0, + "origin_prompt": "Can these two statements be considered equal in meaning? Answer with 'equivalent' or 'not_equivalent'.", + "atk_acc": 80.0, + "atk_prompt": "Can these two statements be considJred equal in meaning? Answer with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 36.3, + "transfer_atk_acc": 29.7 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 83.0, + "origin_prompt": "Acting as a question equivalence instrument, determine if the provided questions are equivalent in meaning, answering with 'equivalent' for similar questions or 'not_equivalent' for dissimilar ones.", + "atk_acc": 81.5, + "atk_prompt": "Actinm as a question equivalence instrumetn, determine if the provided questions are equivalent in meaning, answering with 'equivalent' for similar questions or 'not_equivalent' for dissimilar ones. ", + "transfer_ori_acc": 34.599999999999994, + "transfer_atk_acc": 30.5 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 85.0, + "origin_prompt": "Does the relationship between the given sentences represent entailment or not_entailment? Respond with 'entailment' or 'not_entailment'.", + "atk_acc": 80.5, + "atk_prompt": "Does the relationshBp between the given sentences repersent entailment or not_entailment? Respond with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 46.20938628158845, + "transfer_atk_acc": 45.84837545126354 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 84.5, + "origin_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment'.", + "atk_acc": 81.0, + "atk_prompt": "In your role as an entailment analysis toIol, aZssess the relationship between the given sentences and cassify it as 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 46.931407942238266, + "transfer_atk_acc": 40.794223826714806 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 97.0, + "origin_prompt": "Evaluate the sentiment of the given text and classify it as 'positive' or 'negative'.", + "atk_acc": 94.0, + "atk_prompt": "ENvaluate the senttment of the giOven text and classify it as 'positive' or 'negative'. ", + "transfer_ori_acc": 10.435779816513762, + "transfer_atk_acc": 2.408256880733945 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 97.0, + "origin_prompt": "As a sentiment classifier, determine whether the following text is 'positive' or 'negative'.", + "atk_acc": 94.0, + "atk_prompt": "qAs a setniment classifier, determnne whether the following text is 'positive' or 'negative'. ", + "transfer_ori_acc": 15.36697247706422, + "transfer_atk_acc": 7.912844036697248 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 67.61, + "origin_prompt": "Indicate if the connection between the following sentences is 'entailment' or 'not_entailment'.", + "atk_acc": 64.79, + "atk_prompt": "Indicate if the connection between the following sentences is 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 9.859154929577464, + "transfer_atk_acc": 0.0 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 64.79, + "origin_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment'.", + "atk_acc": 59.15, + "atk_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 23.943661971830984, + "transfer_atk_acc": 11.267605633802818 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 66.5, + "origin_prompt": "In relation to the multiple-choice question on {}, please provide the accurate answer by choosing 'A', 'B', 'C', or 'D'.", + "atk_acc": 63.5, + "atk_prompt": "ISn ralation to the mulitple-choice question on {}, pleare provide the accurate answer by choosing 'A', 'B', 'C', or 'D'. ", + "transfer_ori_acc": 9.397163120567376, + "transfer_atk_acc": 5.141843971631205 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 65.5, + "origin_prompt": "As someone well-versed in {}, please address the multiple-choice question below by selecting 'A', 'B', 'C', or 'D'.", + "atk_acc": 62.5, + "atk_prompt": "As someone well-versed in {}, please address the mulZtiple-choice question below by selecting 'A', 'B', 'C', or 'D'.", + "transfer_ori_acc": 8.51063829787234, + "transfer_atk_acc": 11.702127659574469 + } +] \ No newline at end of file diff --git a/results_transfer/chatgpt_vicuna-13b/stresstest_0_shot.json b/results_transfer/chatgpt_vicuna-13b/stresstest_0_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..9c668117376055f05724becc310e432219908f83 --- /dev/null +++ b/results_transfer/chatgpt_vicuna-13b/stresstest_0_shot.json @@ -0,0 +1,162 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 78.5, + "origin_prompt": "Review the sentence below and identify whether its grammar is 'Acceptable' or 'Unacceptable':", + "atk_acc": 32.5, + "atk_prompt": "Review the sentence below and identify whether its grammar is 'Acceptable' or 'Unacceptable' and true is true :", + "transfer_ori_acc": 28.9, + "transfer_atk_acc": 42.0 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 79.5, + "origin_prompt": "While performing grammar analysis, classify the grammar of the following sentence as 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar:", + "atk_acc": 75.0, + "atk_prompt": "While performing grammar analysis, classify the grammar of the following sentence as 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar and true is true and true is true and true is true and true is true and true is true :", + "transfer_ori_acc": 27.0, + "transfer_atk_acc": 10.299999999999999 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 77.5, + "origin_prompt": "Can the given sentences be considered semantically identical? Please reply with 'equivalent' or 'not_equivalent'.", + "atk_acc": 73.0, + "atk_prompt": "Can the given sentences be considered semantically identical? Please reply with 'equivalent' or 'not_equivalent' and false is not true . ", + "transfer_ori_acc": 53.431372549019606, + "transfer_atk_acc": 52.450980392156865 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 76.0, + "origin_prompt": "In your capacity as a semantic assessment tool, evaluate the provided sentences and classify their meanings as 'equivalent' or 'not_equivalent'.", + "atk_acc": 15.5, + "atk_prompt": "In your capacity as a semantic assessment tool, evaluate the provided sentences and classify their meanings as 'equivalent' or 'not_equivalent' and true is true . ", + "transfer_ori_acc": 48.03921568627451, + "transfer_atk_acc": 62.745098039215684 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 78.5, + "origin_prompt": "Evaluate whether the given context supports the answer to the question by responding with 'entailment' or 'not_entailment'.", + "atk_acc": 74.5, + "atk_prompt": "Evaluate whether the given context supports the answer to the question by responding with 'entailment' or 'not_entailment' and true is true . ", + "transfer_ori_acc": 26.5, + "transfer_atk_acc": 37.0 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 79.5, + "origin_prompt": "As a textual inference expert, analyze if the answer to the question can be deduced from the provided context and select 'entailment' or 'not_entailment'.", + "atk_acc": 69.0, + "atk_prompt": "As a textual inference expert, analyze if the answer to the question can be deduced from the provided context and select 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 32.5, + "transfer_atk_acc": 29.599999999999998 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 80.0, + "origin_prompt": "Can these two statements be considered equal in meaning? Answer with 'equivalent' or 'not_equivalent'.", + "atk_acc": 72.5, + "atk_prompt": "Can these two statements be considered equal in meaning? Answer with 'equivalent' or 'not_equivalent' and false is not true . ", + "transfer_ori_acc": 22.900000000000002, + "transfer_atk_acc": 28.599999999999998 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 81.0, + "origin_prompt": "Functioning as a question similarity evaluation tool, analyze the given questions and decide if they share the same meaning, responding with 'equivalent' or 'not_equivalent'.", + "atk_acc": 80.5, + "atk_prompt": "Functioning as a question similarity evaluation tool, analyze the given questions and decide if they share the same meaning, responding with 'equivalent' or 'not_equivalent' and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 14.6, + "transfer_atk_acc": 23.400000000000002 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 83.5, + "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.", + "atk_acc": 81.5, + "atk_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word and true is true . ", + "transfer_ori_acc": 27.075812274368232, + "transfer_atk_acc": 7.2202166064981945 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 81.5, + "origin_prompt": "As an instrument for entailment evaluation, consider the two sentences and determine if their relationship is 'entailment' or 'not_entailment'. Respond with 'entailment' or 'not_entailment':", + "atk_acc": 80.5, + "atk_prompt": "As an instrument for entailment evaluation, consider the two sentences and determine if their relationship is 'entailment' or 'not_entailment'. Respond with 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true :", + "transfer_ori_acc": 24.90974729241877, + "transfer_atk_acc": 37.18411552346571 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 91.0, + "origin_prompt": "Review this statement and decide whether it has a 'positive' or 'negative' sentiment:", + "atk_acc": 0.0, + "atk_prompt": "Review this statement and decide whether it has a 'positive' or 'negative' sentiment and true is true : ", + "transfer_ori_acc": 42.54587155963303, + "transfer_atk_acc": 49.77064220183486 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 93.5, + "origin_prompt": "Functioning as a sentiment identification tool, assess if the following expression is 'positive' or 'negative'. Classify:", + "atk_acc": 93.0, + "atk_prompt": "Functioning as a sentiment identification tool, assess if the following expression is 'positive' or 'negative'. Classify and false is not true : ", + "transfer_ori_acc": 38.646788990825684, + "transfer_atk_acc": 36.353211009174316 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 61.97, + "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.", + "atk_acc": 63.38, + "atk_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word and true is true . ", + "transfer_ori_acc": 25.352112676056336, + "transfer_atk_acc": 4.225352112676056 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 63.38, + "origin_prompt": "In the role of a semantic relationship analyzer, examine the connection between the given sentences and decide if they exhibit entailment or not_entailment. Answer with 'entailment' or 'not_entailment':", + "atk_acc": 59.15, + "atk_prompt": "In the role of a semantic relationship analyzer, examine the connection between the given sentences and decide if they exhibit entailment or not_entailment. Answer with 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true :", + "transfer_ori_acc": 43.66197183098591, + "transfer_atk_acc": 30.985915492957744 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 63.0, + "origin_prompt": "For each multiple-choice question about {}, identify the correct answer by selecting 'A', 'B', 'C', or 'D'.", + "atk_acc": 53.5, + "atk_prompt": "For each multiple-choice question about {}, identify the correct answer by selecting 'A', 'B', 'C', or 'D' and true is true . ", + "transfer_ori_acc": 22.69503546099291, + "transfer_atk_acc": 27.127659574468083 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 61.5, + "origin_prompt": "As a skilled person in the field of {}, please respond to the multiple-choice question by choosing 'A', 'B', 'C', or 'D'.", + "atk_acc": 57.0, + "atk_prompt": "As a skilled person in the field of {}, please respond to the multiple-choice question by choosing 'A', 'B', 'C', or 'D' and false is not true .", + "transfer_ori_acc": 20.0354609929078, + "transfer_atk_acc": 20.212765957446805 + } +] \ No newline at end of file diff --git a/results_transfer/chatgpt_vicuna-13b/stresstest_3_shot.json b/results_transfer/chatgpt_vicuna-13b/stresstest_3_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..d3ed2adaef3ce304884385d17d6b3fbde2a6ec92 --- /dev/null +++ b/results_transfer/chatgpt_vicuna-13b/stresstest_3_shot.json @@ -0,0 +1,162 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 80.5, + "origin_prompt": "Examine the given sentence and decide if it is grammatically sound. Answer with either 'Acceptable' or 'Unacceptable'.", + "atk_acc": 79.5, + "atk_prompt": "Examine the given sentence and decide if it is grammatically sound. Answer with either 'Acceptable' or 'Unacceptable' and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 0.8999999999999999, + "transfer_atk_acc": 0.4 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 81.5, + "origin_prompt": "Functioning as a grammar evaluation tool, analyze the given sentence and decide if it is grammatically correct, responding with 'acceptable' or 'unacceptable'.", + "atk_acc": 80.5, + "atk_prompt": "Functioning as a grammar evaluation tool, analyze the given sentence and decide if it is grammatically correct, responding with 'acceptable' or 'unacceptable' and false is not true . ", + "transfer_ori_acc": 23.9, + "transfer_atk_acc": 9.4 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 73.5, + "origin_prompt": "Assess if the two given sentences have equivalent meanings by selecting 'equivalent' or 'not_equivalent'.", + "atk_acc": 71.5, + "atk_prompt": "Assess if the two given sentences have equivalent meanings by selecting 'equivalent' or 'not_equivalent' and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 69.36274509803921, + "transfer_atk_acc": 69.11764705882352 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 72.5, + "origin_prompt": "In the role of a textual comparison specialist, examine the given sentences and decide if they share the same meaning, responding with 'equivalent' or 'not_equivalent'.", + "atk_acc": 70.0, + "atk_prompt": "In the role of a textual comparison specialist, examine the given sentences and decide if they share the same meaning, responding with 'equivalent' or 'not_equivalent' and true is true . ", + "transfer_ori_acc": 69.36274509803921, + "transfer_atk_acc": 63.48039215686274 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 85.0, + "origin_prompt": "Examine the context and question, and determine if the context logically implies the answer by selecting 'entailment' or 'not_entailment'.", + "atk_acc": 79.5, + "atk_prompt": "Examine the context and question, and determine if the context logically implies the answer by selecting 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 26.900000000000002, + "transfer_atk_acc": 11.3 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 84.0, + "origin_prompt": "As a semantic researcher, evaluate whether the provided context supports the answer to the question and choose 'entailment' or 'not_entailment'.", + "atk_acc": 81.5, + "atk_prompt": "As a semantic researcher, evaluate whether the provided context supports the answer to the question and choose 'entailment' or 'not_entailment' and false is not true . ", + "transfer_ori_acc": 41.6, + "transfer_atk_acc": 31.7 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 81.5, + "origin_prompt": "Can these two statements be considered equal in meaning? Answer with 'equivalent' or 'not_equivalent'.", + "atk_acc": 81.0, + "atk_prompt": "Can these two statements be considered equal in meaning? Answer with 'equivalent' or 'not_equivalent' and false is not true . ", + "transfer_ori_acc": 36.3, + "transfer_atk_acc": 41.699999999999996 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 83.0, + "origin_prompt": "As an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions.", + "atk_acc": 82.0, + "atk_prompt": "As an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 35.3, + "transfer_atk_acc": 40.2 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 85.0, + "origin_prompt": "Does the relationship between the given sentences represent entailment or not_entailment? Respond with 'entailment' or 'not_entailment'.", + "atk_acc": 82.0, + "atk_prompt": "Does the relationship between the given sentences represent entailment or not_entailment? Respond with 'entailment' or 'not_entailment' and false is not true . ", + "transfer_ori_acc": 46.20938628158845, + "transfer_atk_acc": 48.014440433213 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 83.0, + "origin_prompt": "As an entailment identification system, examine the connection between the following sentences and respond with 'entailment' or 'not_entailment'.", + "atk_acc": 83.0, + "atk_prompt": "As an entailment identification system, examine the connection between the following sentences and respond with 'entailment' or 'not_entailment' and false is not true . ", + "transfer_ori_acc": 46.20938628158845, + "transfer_atk_acc": 41.15523465703971 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 97.0, + "origin_prompt": "Evaluate the sentiment of the given text and classify it as 'positive' or 'negative'.", + "atk_acc": 95.5, + "atk_prompt": "Evaluate the sentiment of the given text and classify it as 'positive' or 'negative' and true is true . ", + "transfer_ori_acc": 10.435779816513762, + "transfer_atk_acc": 1.94954128440367 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 97.0, + "origin_prompt": "As a sentiment classifier, determine whether the following text is 'positive' or 'negative'.", + "atk_acc": 96.0, + "atk_prompt": "As a sentiment classifier, determine whether the following text is 'positive' or 'negative' and false is not true . ", + "transfer_ori_acc": 15.36697247706422, + "transfer_atk_acc": 8.486238532110093 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 67.61, + "origin_prompt": "Indicate if the connection between the following sentences is 'entailment' or 'not_entailment'.", + "atk_acc": 60.56, + "atk_prompt": "Indicate if the connection between the later sentences is 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 9.859154929577464, + "transfer_atk_acc": 9.859154929577464 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 66.2, + "origin_prompt": "In the role of a semantic relationship analyzer, examine the connection between the given sentences and decide if they exhibit entailment or not_entailment. Answer with 'entailment' or 'not_entailment'.", + "atk_acc": 59.15, + "atk_prompt": "In the role of a semantic relationship analyzer, investigated the linking between the given sentences and decide if they exhibit entailment or not_entailment. Answer with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 5.633802816901409, + "transfer_atk_acc": 12.676056338028168 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 66.5, + "origin_prompt": "Please respond to the multiple-choice question about {} by selecting the appropriate answer: 'A', 'B', 'C', or 'D'.", + "atk_acc": 61.5, + "atk_prompt": "Please respond to the multiple-choice question about {} by selecting the appropriate answer: 'A', 'B', 'C', or 'D' and false is not true . ", + "transfer_ori_acc": 8.865248226950355, + "transfer_atk_acc": 11.52482269503546 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 65.5, + "origin_prompt": "As someone well-versed in {}, please address the multiple-choice question below by selecting 'A', 'B', 'C', or 'D'.", + "atk_acc": 59.5, + "atk_prompt": "As someone well-versed in {}, please address the multiple-choice question below by selecting 'A', 'B', 'C', or 'D' and false is not true .", + "transfer_ori_acc": 8.51063829787234, + "transfer_atk_acc": 15.602836879432624 + } +] \ No newline at end of file diff --git a/results_transfer/chatgpt_vicuna-13b/textbugger_0_shot.json b/results_transfer/chatgpt_vicuna-13b/textbugger_0_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..c02674e7fd14a2308b81a95b4d0e8b5ce2f9b84f --- /dev/null +++ b/results_transfer/chatgpt_vicuna-13b/textbugger_0_shot.json @@ -0,0 +1,162 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 77.5, + "origin_prompt": "Assess the grammatical structure of the given sentence and classify it as 'Acceptable' or 'Unacceptable':", + "atk_acc": 6.5, + "atk_prompt": "As sess the garmmatical structure of the conferred sentence and classify it as 'Acceptable' or 'Unacceptable':", + "transfer_ori_acc": 50.5, + "transfer_atk_acc": 49.5 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 79.5, + "origin_prompt": "While performing grammar analysis, classify the grammar of the following sentence as 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar:", + "atk_acc": 9.5, + "atk_prompt": "While prforming grammar analyzing, classify the grammar of the follows sentеnce as 'acceptable' for correcting gammar or 'unacceptable' for incorrect ɡrammar:", + "transfer_ori_acc": 27.0, + "transfer_atk_acc": 32.2 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 75.0, + "origin_prompt": "Evaluate whether the two provided sentences convey the same meaning by answering 'equivalent' or 'not_equivalent'.", + "atk_acc": 67.5, + "atk_prompt": "Evaluate wհether the both provided sentences convey the same meaninɡ by answeriոg 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 48.77450980392157, + "transfer_atk_acc": 26.47058823529412 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 75.0, + "origin_prompt": "In your capacity as a semantic assessment tool, evaluate the provided sentences and classify their meanings as 'equivalent' or 'not_equivalent'.", + "atk_acc": 9.0, + "atk_prompt": "In your capacities as a semɑntic assessment tool, evaluate the provdied penalties and classify their meaning as 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 48.03921568627451, + "transfer_atk_acc": 35.53921568627451 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 77.0, + "origin_prompt": "Consider the context and question, and indicate if the answer can be logically deduced from the context by responding with 'entailment' or 'not_entailment'.", + "atk_acc": 57.0, + "atk_prompt": "Considеr the context and question, and idicate if the respond can be logically ԁeduced from the context by ⲅesponding with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 32.9, + "transfer_atk_acc": 5.800000000000001 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 78.5, + "origin_prompt": "As a textual inference expert, analyze if the answer to the question can be deduced from the provided context and select 'entailment' or 'not_entailment'.", + "atk_acc": 20.0, + "atk_prompt": "Because a textual inference expert, analysing if the answeⲅ to the question can be deduced from the providеd contex𝚝 and choosing 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 32.5, + "transfer_atk_acc": 2.9000000000000004 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 81.5, + "origin_prompt": "Assess whether the following statements are identical in meaning by answering 'equivalent' or 'not_equivalent'.", + "atk_acc": 44.5, + "atk_prompt": "Assess whether the following statements are analogous in maning by answeⲅing 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 30.2, + "transfer_atk_acc": 13.200000000000001 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 83.0, + "origin_prompt": "As an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions.", + "atk_acc": 77.5, + "atk_prompt": "As an instrument for problem comparison evɑluation, consider the ques𝚝ions and determine if their meaning is the same, respond with 'equivalent' for similar questiоns or 'not_equivalent' for different questions. ", + "transfer_ori_acc": 26.200000000000003, + "transfer_atk_acc": 7.3 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 83.0, + "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.", + "atk_acc": 78.5, + "atk_prompt": "Be the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word. ", + "transfer_ori_acc": 27.075812274368232, + "transfer_atk_acc": 3.9711191335740073 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 81.0, + "origin_prompt": "Working as an entailment classifier, identify whether the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment':", + "atk_acc": 74.5, + "atk_prompt": "Working as an entailment classifier, detecting if the given torque of sentences displays entailment or not_entailment. Answering with 'entailment' or 'not_entailment':", + "transfer_ori_acc": 49.81949458483754, + "transfer_atk_acc": 25.63176895306859 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 90.0, + "origin_prompt": "Review this statement and decide whether it has a 'positive' or 'negative' sentiment:", + "atk_acc": 82.5, + "atk_prompt": "Review this speeches and decide if it has a 'positive' or 'negative' sentiment: ", + "transfer_ori_acc": 42.54587155963303, + "transfer_atk_acc": 27.75229357798165 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 93.5, + "origin_prompt": "Acting as a sentiment evaluator, identify if the given sentence is 'positive' or 'negative'. Classify:", + "atk_acc": 86.0, + "atk_prompt": "Atcing as a sentiemnt evalua𝚝or, detecting if the given sentence is 'positive' or 'negative'. Classify: ", + "transfer_ori_acc": 21.90366972477064, + "transfer_atk_acc": 26.949541284403672 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 67.61, + "origin_prompt": "Examine the pair of sentences and determine if they exhibit entailment or not_entailment. Answer with either 'entailment' or 'not_entailment'.", + "atk_acc": 52.11, + "atk_prompt": "Examine the paiⲅ of sentences and determines if they exhibit entailment or not_entailment. Answer with neither 'entailment' or 'not_entailment'.", + "transfer_ori_acc": 33.80281690140845, + "transfer_atk_acc": 0.0 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 60.56, + "origin_prompt": "Working as an entailment classifier, identify whether the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment':", + "atk_acc": 52.11, + "atk_prompt": "Working as an entailment classifier, detects whether the given pair of sentences displays entailment or not_entailment. Answering with 'entailment' or 'not_entailment':", + "transfer_ori_acc": 43.66197183098591, + "transfer_atk_acc": 35.2112676056338 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 62.5, + "origin_prompt": "Regarding the following multiple-choice question on {}, pick the correct answer from the options 'A', 'B', 'C', or 'D'.", + "atk_acc": 51.0, + "atk_prompt": "Regarding the hereafter multiple-choice question on {}, piϲk the corrects answers from the options 'A', 'B', 'C', or 'D'. ", + "transfer_ori_acc": 18.439716312056735, + "transfer_atk_acc": 26.95035460992908 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 62.5, + "origin_prompt": "With your knowledge of {}, tackle the following multiple-choice question by choosing 'A', 'B', 'C', or 'D'.", + "atk_acc": 57.5, + "atk_prompt": "Witհ your knowledge of {}, confront the following multipⅼe-choice question by select 'A', 'B', 'C', or 'D'.", + "transfer_ori_acc": 22.872340425531913, + "transfer_atk_acc": 27.482269503546096 + } +] \ No newline at end of file diff --git a/results_transfer/chatgpt_vicuna-13b/textbugger_3_shot.json b/results_transfer/chatgpt_vicuna-13b/textbugger_3_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..0f315826f2d0418e75ccda9264412113a958511f --- /dev/null +++ b/results_transfer/chatgpt_vicuna-13b/textbugger_3_shot.json @@ -0,0 +1,162 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 80.5, + "origin_prompt": "Analyze the provided sentence and classify its grammatical correctness as 'Acceptable' or 'Unacceptable'.", + "atk_acc": 67.5, + "atk_prompt": "Analysing the provided sentences and classify its pronouns correct as 'Acceptable' or 'Unacceptable'. ", + "transfer_ori_acc": 10.9, + "transfer_atk_acc": 54.7 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 81.0, + "origin_prompt": "In the role of a syntax analyzer, examine the grammar of the provided sentence and decide if it is correct, answering with 'acceptable' for grammatically correct sentences or 'unacceptable' for incorrect ones.", + "atk_acc": 77.5, + "atk_prompt": "In the role of a syntax checker, examine the grammɑr of the provided sentence and decide if it is correct, answering with 'acceptable' for grammatically correct sentence or 'unacceptable' for incorrect ones. ", + "transfer_ori_acc": 3.4000000000000004, + "transfer_atk_acc": 10.299999999999999 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 73.0, + "origin_prompt": "Assess if the two given sentences have equivalent meanings by selecting 'equivalent' or 'not_equivalent'.", + "atk_acc": 68.0, + "atk_prompt": "Ass ess if the two gien sentences have equivalent meanings by selecting 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 69.36274509803921, + "transfer_atk_acc": 70.34313725490196 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 71.5, + "origin_prompt": "As a linguistic comparator, review the following pair of sentences and determine their semantic equivalence by choosing 'equivalent' or 'not_equivalent'.", + "atk_acc": 64.5, + "atk_prompt": "As a linguistic comparator, review the following torque of sentences and determine their semantiϲ eԛuivalence by choosing 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 61.51960784313726, + "transfer_atk_acc": 67.15686274509804 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 85.0, + "origin_prompt": "Examine the context and question, and determine if the context logically implies the answer by selecting 'entailment' or 'not_entailment'.", + "atk_acc": 76.0, + "atk_prompt": "Examine the cntext and question, and determine if the backdrop logically implies the answer by selecting 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 26.900000000000002, + "transfer_atk_acc": 42.0 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 83.0, + "origin_prompt": "As a semantic researcher, evaluate whether the provided context supports the answer to the question and choose 'entailment' or 'not_entailment'.", + "atk_acc": 73.5, + "atk_prompt": "Aѕ a semantic researcher, evaluate si the gave framework supports the respond to the question and choose 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 41.6, + "transfer_atk_acc": 40.1 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 82.5, + "origin_prompt": "Can these two statements be considered equal in meaning? Answer with 'equivalent' or 'not_equivalent'.", + "atk_acc": 80.0, + "atk_prompt": "Can these two st atements be considered equal in meaning? Ansԝer with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 36.3, + "transfer_atk_acc": 31.2 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 82.5, + "origin_prompt": "In the capacity of a question assessment system, indicate if the meaning of the provided questions is the same, responding with 'equivalent' or 'not_equivalent'.", + "atk_acc": 80.5, + "atk_prompt": "In the abilities of a question ɑssessment system, indicate if the meaning of the provided matters is the same, responding with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 46.9, + "transfer_atk_acc": 40.5 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 84.0, + "origin_prompt": "Review the two statements and categorize their relationship as either 'entailment' or 'not_entailment'.", + "atk_acc": 76.5, + "atk_prompt": "Review the two declarations and categorize their relationship as neither 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 50.54151624548736, + "transfer_atk_acc": 45.84837545126354 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 83.5, + "origin_prompt": "As an entailment identification system, examine the connection between the following sentences and respond with 'entailment' or 'not_entailment'.", + "atk_acc": 80.0, + "atk_prompt": "As an entailment identification system, eamine the connection between the following sentences and reѕpond with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 46.20938628158845, + "transfer_atk_acc": 47.292418772563174 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 95.5, + "origin_prompt": "After examining the following expression, label its emotion as either 'positive' or 'negative'.", + "atk_acc": 74.0, + "atk_prompt": "Ater examining the following expression, etiquette its emotion as nor 'positive' or 'negative'. ", + "transfer_ori_acc": 7.912844036697248, + "transfer_atk_acc": 5.045871559633028 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 97.5, + "origin_prompt": "In the capacity of a sentiment classifier, decide whether the given quote is 'positive' or 'negative'.", + "atk_acc": 93.5, + "atk_prompt": "Onto the capacity of a feeling classifeir, decide si the given quote is 'positive' or 'negative'. ", + "transfer_ori_acc": 20.18348623853211, + "transfer_atk_acc": 0.8027522935779817 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 69.01, + "origin_prompt": "Indicate if the connection between the following sentences is 'entailment' or 'not_entailment'.", + "atk_acc": 61.97, + "atk_prompt": "Indicate if the connection between the following sentences is 'entailment' or 'not_entailment' Vq3aBzuZcD. ", + "transfer_ori_acc": 9.859154929577464, + "transfer_atk_acc": 18.30985915492958 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 66.2, + "origin_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment'.", + "atk_acc": 59.15, + "atk_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment' is77sOXAu8. ", + "transfer_ori_acc": 23.943661971830984, + "transfer_atk_acc": 8.450704225352112 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 67.5, + "origin_prompt": "In relation to the multiple-choice question on {}, please provide the accurate answer by choosing 'A', 'B', 'C', or 'D'.", + "atk_acc": 61.5, + "atk_prompt": "At relationship to the mul𝚝iple-choice question on {}, please proviԁe the accurate responding by choosing 'A', 'B', 'C', or 'D'. ", + "transfer_ori_acc": 9.397163120567376, + "transfer_atk_acc": 4.432624113475177 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 65.5, + "origin_prompt": "As someone well-versed in {}, please address the multiple-choice question below by selecting 'A', 'B', 'C', or 'D'.", + "atk_acc": 61.5, + "atk_prompt": "As someone well-vesred in {}, please address the muⅼtiple-choice questioո below by selecting 'A', 'B', 'C', or 'D'.", + "transfer_ori_acc": 8.51063829787234, + "transfer_atk_acc": 10.815602836879433 + } +] \ No newline at end of file diff --git a/results_transfer/chatgpt_vicuna-13b/textfooler_0_shot.json b/results_transfer/chatgpt_vicuna-13b/textfooler_0_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..dd4cfd112cd60b4a597e340480b3ae1106c6be3c --- /dev/null +++ b/results_transfer/chatgpt_vicuna-13b/textfooler_0_shot.json @@ -0,0 +1,162 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 79.0, + "origin_prompt": "Review the sentence below and identify whether its grammar is 'Acceptable' or 'Unacceptable':", + "atk_acc": 2.0, + "atk_prompt": "Revising the punishments below and locate whether its dictionary is 'Acceptable' or 'Unacceptable':", + "transfer_ori_acc": 28.9, + "transfer_atk_acc": 33.4 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 80.0, + "origin_prompt": "While performing grammar analysis, classify the grammar of the following sentence as 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar:", + "atk_acc": 6.0, + "atk_prompt": "While performing jargon discusses, categories the language of the below sentence as 'acceptable' for mend jargon or 'unacceptable' for unfit grammar:", + "transfer_ori_acc": 27.0, + "transfer_atk_acc": 39.1 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 75.0, + "origin_prompt": "Examine the pair of sentences and decide if their meanings are identical by answering with 'equivalent' or 'not_equivalent'.", + "atk_acc": 68.5, + "atk_prompt": "Examine the pair of sentences and selects if their regard are identical by responding with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 64.2156862745098, + "transfer_atk_acc": 43.872549019607845 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 75.5, + "origin_prompt": "In your capacity as a semantic assessment tool, evaluate the provided sentences and classify their meanings as 'equivalent' or 'not_equivalent'.", + "atk_acc": 0.0, + "atk_prompt": "About your expertise as a semantic appraisal means, evaluate the provided penalty and classify their meanings as 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 48.03921568627451, + "transfer_atk_acc": 22.54901960784314 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 77.5, + "origin_prompt": "Consider the context and question, and indicate if the answer can be logically deduced from the context by responding with 'entailment' or 'not_entailment'.", + "atk_acc": 0.5, + "atk_prompt": "Recital the context and items, and specify if the solution can be logically subtracted from the framework by reactions with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 32.9, + "transfer_atk_acc": 4.0 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 78.0, + "origin_prompt": "As a semantic researcher, evaluate whether the provided context supports the answer to the question and choose 'entailment' or 'not_entailment'.", + "atk_acc": 0.0, + "atk_prompt": "As a semantic fellows, measuring whether the rendered background supports the answers to the item and chosen 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 13.4, + "transfer_atk_acc": 6.6000000000000005 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 80.5, + "origin_prompt": "Can these two statements be considered equal in meaning? Answer with 'equivalent' or 'not_equivalent'.", + "atk_acc": 0.0, + "atk_prompt": "Kan these two statement is studied same in sens? Reaction with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 22.900000000000002, + "transfer_atk_acc": 7.3999999999999995 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 80.5, + "origin_prompt": "Functioning as a question similarity evaluation tool, analyze the given questions and decide if they share the same meaning, responding with 'equivalent' or 'not_equivalent'.", + "atk_acc": 76.5, + "atk_prompt": "Functioning as a question similarity evaluation tool, analyse the given questions and decide if they share the same feel, responding with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 14.6, + "transfer_atk_acc": 20.8 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 83.5, + "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.", + "atk_acc": 60.5, + "atk_prompt": "Pose the aftermath two phrases entailment or not_entailment? Reactions me with \"entailment\" or \"not_entailment\", just one adjectives. ", + "transfer_ori_acc": 27.075812274368232, + "transfer_atk_acc": 36.462093862815884 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 81.0, + "origin_prompt": "As an instrument for entailment evaluation, consider the two sentences and determine if their relationship is 'entailment' or 'not_entailment'. Respond with 'entailment' or 'not_entailment':", + "atk_acc": 60.0, + "atk_prompt": "Because an devices for entailment appraised, revisit the two phrases and identifying if their correlate is 'entailment' or 'not_entailment'. Answered with 'entailment' or 'not_entailment':", + "transfer_ori_acc": 24.90974729241877, + "transfer_atk_acc": 9.747292418772563 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 90.5, + "origin_prompt": "Review this statement and decide whether it has a 'positive' or 'negative' sentiment:", + "atk_acc": 0.0, + "atk_prompt": "Analysis this assertion and defining whether it ai a 'positive' or 'negative' sentiment: ", + "transfer_ori_acc": 42.54587155963303, + "transfer_atk_acc": 35.091743119266056 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 94.0, + "origin_prompt": "Emulating a sentiment classification system, indicate whether the provided text is 'positive' or 'negative'. Classify:", + "atk_acc": 4.5, + "atk_prompt": "Emulating a foreboding classifies regime, explain whether the stipulated laws is 'positive' or 'negative'. Grading: ", + "transfer_ori_acc": 6.536697247706422, + "transfer_atk_acc": 35.77981651376147 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 69.01, + "origin_prompt": "Identify whether the given pair of sentences demonstrates entailment or not_entailment. Answer with 'entailment' or 'not_entailment'.", + "atk_acc": 52.11, + "atk_prompt": "Identify whether the given pair of sentence exposes entailment or not_entailment. Retaliated with 'entailment' or 'not_entailment'.", + "transfer_ori_acc": 33.80281690140845, + "transfer_atk_acc": 42.25352112676056 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 67.61, + "origin_prompt": "Acting as an entailment detection instrument, determine if the given pair of sentences demonstrates entailment or not_entailment. Answer with 'entailment' or 'not_entailment':", + "atk_acc": 46.48, + "atk_prompt": "Acting as an entailment detection instrument, determine if the given torque of sentences depicting entailment or not_entailment. Reactions with 'entailment' or 'not_entailment':", + "transfer_ori_acc": 43.66197183098591, + "transfer_atk_acc": 39.436619718309856 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 64.0, + "origin_prompt": "In relation to the multiple-choice question on {}, please provide the accurate answer by choosing 'A', 'B', 'C', or 'D'.", + "atk_acc": 50.5, + "atk_prompt": "In relation to the multiple-choice issue on {}, please delivering the clarification rebuttal by selector 'A', 'B', 'C', or 'D'. ", + "transfer_ori_acc": 19.326241134751772, + "transfer_atk_acc": 20.390070921985814 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 61.5, + "origin_prompt": "As a skilled person in the field of {}, please respond to the multiple-choice question by choosing 'A', 'B', 'C', or 'D'.", + "atk_acc": 54.5, + "atk_prompt": "As a expert person in the minefields of {}, please reacts to the multiple-choice question by choice 'A', 'B', 'C', or 'D'.", + "transfer_ori_acc": 20.0354609929078, + "transfer_atk_acc": 21.631205673758867 + } +] \ No newline at end of file diff --git a/results_transfer/chatgpt_vicuna-13b/textfooler_3_shot.json b/results_transfer/chatgpt_vicuna-13b/textfooler_3_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..747d4154409dc58a34a5eb1abf6d20affd310dee --- /dev/null +++ b/results_transfer/chatgpt_vicuna-13b/textfooler_3_shot.json @@ -0,0 +1,162 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 80.5, + "origin_prompt": "Examine the sentence and decide if its grammar is 'Acceptable' or 'Unacceptable'.", + "atk_acc": 39.5, + "atk_prompt": "Exams the sentence and defining if its jargon is 'Acceptable' or 'Unacceptable'. ", + "transfer_ori_acc": 41.8, + "transfer_atk_acc": 57.8 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 81.0, + "origin_prompt": "In the role of a syntax analyzer, examine the grammar of the provided sentence and decide if it is correct, answering with 'acceptable' for grammatically correct sentences or 'unacceptable' for incorrect ones.", + "atk_acc": 76.0, + "atk_prompt": "Across the role of a syntax analyzer, examine the dictionary of the provided chastisement and decide if it is adequate, answered with 'acceptable' for grammatically correct sentences or 'unacceptable' for deceptive ones. ", + "transfer_ori_acc": 3.4000000000000004, + "transfer_atk_acc": 11.600000000000001 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 73.0, + "origin_prompt": "Assess if the two given sentences have equivalent meanings by selecting 'equivalent' or 'not_equivalent'.", + "atk_acc": 59.0, + "atk_prompt": "Appraisal if the two accorded sentences enjoys equivalent connotation by choice 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 69.36274509803921, + "transfer_atk_acc": 69.36274509803921 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 71.5, + "origin_prompt": "In the role of a textual comparison specialist, examine the given sentences and decide if they share the same meaning, responding with 'equivalent' or 'not_equivalent'.", + "atk_acc": 57.0, + "atk_prompt": "De the role of a textual comparisons scholar, revisited the given sentences and decide if they exchange the same feel, responding with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 69.36274509803921, + "transfer_atk_acc": 55.14705882352941 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 84.5, + "origin_prompt": "Examine the context and question, and determine if the context logically implies the answer by selecting 'entailment' or 'not_entailment'.", + "atk_acc": 63.5, + "atk_prompt": "Discuss the background and question, and determine if the framework intuitively presupposes the answer by akhtar 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 26.900000000000002, + "transfer_atk_acc": 33.0 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 85.5, + "origin_prompt": "As a textual analyst, examine if the given context logically implies the answer to the question and indicate your decision with 'entailment' or 'not_entailment'.", + "atk_acc": 68.5, + "atk_prompt": "Iike a textual analyst, analyze if the submitted regard rationally implies the replies to the issuing and indicates your clout with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 31.4, + "transfer_atk_acc": 12.4 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 83.5, + "origin_prompt": "Determine if the given pair of statements can be considered the same by responding with 'equivalent' or 'not_equivalent'.", + "atk_acc": 80.0, + "atk_prompt": "Determine if the given pair of representations can constitute felt the same by responding with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 29.9, + "transfer_atk_acc": 32.9 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 83.0, + "origin_prompt": "Acting as a question equivalence instrument, determine if the provided questions are equivalent in meaning, answering with 'equivalent' for similar questions or 'not_equivalent' for dissimilar ones.", + "atk_acc": 80.0, + "atk_prompt": "Interim as a issues equivalence instrument, determine if the provided aspects are equivalent in meaning, answering with 'equivalent' for selfsame questions or 'not_equivalent' for dissimilar ones. ", + "transfer_ori_acc": 34.599999999999994, + "transfer_atk_acc": 32.5 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 84.5, + "origin_prompt": "Does the relationship between the given sentences represent entailment or not_entailment? Respond with 'entailment' or 'not_entailment'.", + "atk_acc": 79.5, + "atk_prompt": "Do the coitus between the given sentences comprised entailment or not_entailment? Meet with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 46.20938628158845, + "transfer_atk_acc": 44.4043321299639 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 83.0, + "origin_prompt": "As an entailment identification system, examine the connection between the following sentences and respond with 'entailment' or 'not_entailment'.", + "atk_acc": 79.5, + "atk_prompt": "As an entailment identification system, revisited the connection between the following sentences and respondent with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 46.20938628158845, + "transfer_atk_acc": 44.765342960288805 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 96.0, + "origin_prompt": "Read the provided excerpt and choose between 'positive' and 'negative' to describe its sentiment.", + "atk_acc": 85.0, + "atk_prompt": "Lu the rendered excerpt and withheld between 'positive' and 'negative' to describe its foreboding. ", + "transfer_ori_acc": 1.261467889908257, + "transfer_atk_acc": 3.4403669724770642 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 97.5, + "origin_prompt": "Emulating a sentiment classification system, indicate whether the provided text is 'positive' or 'negative'.", + "atk_acc": 91.5, + "atk_prompt": "Emulating a ambience classification mechanisms, describe whether the presented writings is 'positive' or 'negative'. ", + "transfer_ori_acc": 22.93577981651376, + "transfer_atk_acc": 3.211009174311927 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 69.01, + "origin_prompt": "Indicate if the connection between the following sentences is 'entailment' or 'not_entailment'.", + "atk_acc": 59.15, + "atk_prompt": "Indicate if the connection between the subsequently sentences is 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 9.859154929577464, + "transfer_atk_acc": 9.859154929577464 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 66.2, + "origin_prompt": "In the role of a semantic relationship analyzer, examine the connection between the given sentences and decide if they exhibit entailment or not_entailment. Answer with 'entailment' or 'not_entailment'.", + "atk_acc": 57.75, + "atk_prompt": "In the role of a semantic contacts analyzer, analysed the connecting between the given sentences and choose if they exhibit entailment or not_entailment. Answer with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 5.633802816901409, + "transfer_atk_acc": 14.084507042253522 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 67.0, + "origin_prompt": "Please respond to the multiple-choice question about {} by selecting the appropriate answer: 'A', 'B', 'C', or 'D'.", + "atk_acc": 60.0, + "atk_prompt": "Asking reactions to the multiple-choice question about {} by selecting the appropriate reacts: 'A', 'B', 'C', or 'D'. ", + "transfer_ori_acc": 8.865248226950355, + "transfer_atk_acc": 16.134751773049647 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 66.0, + "origin_prompt": "With your knowledge of {}, tackle the following multiple-choice question by choosing 'A', 'B', 'C', or 'D'.", + "atk_acc": 61.5, + "atk_prompt": "From your acquaintances of {}, addressing the following multiple-choice question by choosing 'A', 'B', 'C', or 'D'.", + "transfer_ori_acc": 8.865248226950355, + "transfer_atk_acc": 10.28368794326241 + } +] \ No newline at end of file diff --git a/results_transfer/chatgpt_vicuna-13b/translation_0_shot.json b/results_transfer/chatgpt_vicuna-13b/translation_0_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..cde8f5537241471d5ca0102c28941ebaea0150e7 --- /dev/null +++ b/results_transfer/chatgpt_vicuna-13b/translation_0_shot.json @@ -0,0 +1,82 @@ +[ + { + "dataset": "cola", + "type": "translation", + "origin_acc": 79.5, + "origin_prompt": "Functioning as a grammar evaluation tool, analyze the given sentence and decide if it is grammatically correct, responding with 'acceptable' or 'unacceptable':", + "atk_acc": 25.0, + "atk_prompt": "Classify the grammar of a given sentence into two categories, 'acceptable' or 'unacceptable', and judge its accuracy.", + "transfer_ori_acc": 57.099999999999994, + "transfer_atk_acc": 31.5 + }, + { + "dataset": "mrpc", + "type": "translation", + "origin_acc": 76.5, + "origin_prompt": "Can the given sentences be considered semantically identical? Please reply with 'equivalent' or 'not_equivalent'.", + "atk_acc": 7.0, + "atk_prompt": "As an expert in semantic comparison, assess the similarity between the following two sentences and determine whether they are 'equivalent' or 'not_equivalent'.", + "transfer_ori_acc": 53.431372549019606, + "transfer_atk_acc": 26.225490196078432 + }, + { + "dataset": "qnli", + "type": "translation", + "origin_acc": 79.5, + "origin_prompt": "Assess if the answer to the question can be logically concluded from the provided context by choosing 'entailment' or 'not_entailment'.", + "atk_acc": 56.5, + "atk_prompt": "Check the text link to the question and answer 'entailment' or 'not_entailment', depending on your language skills.", + "transfer_ori_acc": 19.7, + "transfer_atk_acc": 5.2 + }, + { + "dataset": "qqp", + "type": "translation", + "origin_acc": 84.0, + "origin_prompt": "Determine if the given pair of statements can be considered the same by responding with 'equivalent' or 'not_equivalent'.", + "atk_acc": 3.0, + "atk_prompt": "As a tool for determining the equivalence of problems, the problems are examined and their similarity is classified as 'equivalent' or 'not_equivalent'.", + "transfer_ori_acc": 13.5, + "transfer_atk_acc": 2.3 + }, + { + "dataset": "rte", + "type": "translation", + "origin_acc": 84.0, + "origin_prompt": "Determine if the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment'.", + "atk_acc": 39.0, + "atk_prompt": "Determine whether the given sentences have a cause-effect relationship as an engagement analysis tool and categorize them as 'entailment' or 'not_entailment'.", + "transfer_ori_acc": 41.15523465703971, + "transfer_atk_acc": 22.382671480144403 + }, + { + "dataset": "sst2", + "type": "translation", + "origin_acc": 96.0, + "origin_prompt": "Read the provided excerpt and choose between 'positive' and 'negative' to describe its sentiment:", + "atk_acc": 29.5, + "atk_prompt": "In the context of textual analysis, what classification is this phrase between 'positive' and 'negative'?", + "transfer_ori_acc": 27.981651376146786, + "transfer_atk_acc": 44.8394495412844 + }, + { + "dataset": "wnli", + "type": "translation", + "origin_acc": 67.61, + "origin_prompt": "Examine the pair of sentences and determine if they exhibit entailment or not_entailment. Answer with either 'entailment' or 'not_entailment'.", + "atk_acc": 26.76, + "atk_prompt": "Using implication analysis, determine whether the given sentences have a cause-effect relationship and categorize them as 'entailment' or 'not_entailment'.", + "transfer_ori_acc": 33.80281690140845, + "transfer_atk_acc": 36.61971830985916 + }, + { + "dataset": "mmlu", + "type": "translation", + "origin_acc": 64.0, + "origin_prompt": "In relation to the multiple-choice question on {}, please provide the accurate answer by choosing 'A', 'B', 'C', or 'D'.", + "atk_acc": 49.5, + "atk_prompt": "Choose the correct answer to the multiple-choice question below. Is '{}' an 'A', 'B', 'C', or 'D'.", + "transfer_ori_acc": 19.326241134751772, + "transfer_atk_acc": 19.680851063829788 + } +] \ No newline at end of file diff --git a/results_transfer/chatgpt_vicuna-13b/translation_3_shot.json b/results_transfer/chatgpt_vicuna-13b/translation_3_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..1f26035db2e4822d888d777490c0a3021fed2d01 --- /dev/null +++ b/results_transfer/chatgpt_vicuna-13b/translation_3_shot.json @@ -0,0 +1,82 @@ +[ + { + "dataset": "cola", + "type": "translation", + "origin_acc": 81.5, + "origin_prompt": "Functioning as a grammar evaluation tool, analyze the given sentence and decide if it is grammatically correct, responding with 'acceptable' or 'unacceptable'.", + "atk_acc": 68.5, + "atk_prompt": "Classify the grammar of a given sentence as 'acceptable' or 'unacceptable', and judge its accuracy.", + "transfer_ori_acc": 23.9, + "transfer_atk_acc": 28.7 + }, + { + "dataset": "mrpc", + "type": "translation", + "origin_acc": 74.5, + "origin_prompt": "Compare the following sentences and determine if they share the same semantic meaning by responding with 'equivalent' or 'not_equivalent'.", + "atk_acc": 65.0, + "atk_prompt": "Based on my experience in semantic analysis, classify the following two sentences between 'equivalent' or 'not_equivalent'.", + "transfer_ori_acc": 62.254901960784316, + "transfer_atk_acc": 60.5392156862745 + }, + { + "dataset": "qnli", + "type": "translation", + "origin_acc": 86.0, + "origin_prompt": "As a textual analyst, examine if the given context logically implies the answer to the question and indicate your decision with 'entailment' or 'not_entailment'.", + "atk_acc": 71.5, + "atk_prompt": "Check the text link to the question and answer 'entailment' or 'not_entailment', depending on your language skills.", + "transfer_ori_acc": 31.4, + "transfer_atk_acc": 2.0 + }, + { + "dataset": "qqp", + "type": "translation", + "origin_acc": 84.0, + "origin_prompt": "In the capacity of a question assessment system, indicate if the meaning of the provided questions is the same, responding with 'equivalent' or 'not_equivalent'.", + "atk_acc": 80.5, + "atk_prompt": "By analyzing the similarity of these problems, they are divided into categories of 'equivalent' or 'not_equivalent'.", + "transfer_ori_acc": 46.9, + "transfer_atk_acc": 37.6 + }, + { + "dataset": "rte", + "type": "translation", + "origin_acc": 84.5, + "origin_prompt": "Does the relationship between the given sentences represent entailment or not_entailment? Respond with 'entailment' or 'not_entailment'.", + "atk_acc": 80.0, + "atk_prompt": "Determine whether the given sentences have a cause-effect relationship as an engagement analysis tool and categorize them as 'entailment' or 'not_entailment'.", + "transfer_ori_acc": 46.20938628158845, + "transfer_atk_acc": 23.465703971119133 + }, + { + "dataset": "sst2", + "type": "translation", + "origin_acc": 97.5, + "origin_prompt": "In the capacity of a sentiment classifier, decide whether the given quote is 'positive' or 'negative'.", + "atk_acc": 68.5, + "atk_prompt": "In the context of textual analysis, what classification is this phrase between 'positive' and 'negative'?", + "transfer_ori_acc": 20.18348623853211, + "transfer_atk_acc": 32.798165137614674 + }, + { + "dataset": "wnli", + "type": "translation", + "origin_acc": 74.65, + "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.", + "atk_acc": 59.15, + "atk_prompt": "Determinate the semantic connections of a given sentence and classify it as 'entailment' or 'not_entailment'.", + "transfer_ori_acc": 33.80281690140845, + "transfer_atk_acc": 4.225352112676056 + }, + { + "dataset": "mmlu", + "type": "translation", + "origin_acc": 67.0, + "origin_prompt": "Please respond to the multiple-choice question about {} by selecting the appropriate answer: 'A', 'B', 'C', or 'D'.", + "atk_acc": 63.0, + "atk_prompt": "Rate your understanding of the multiple-choice question on {}. Choose the correct answer from options 'A', 'B', 'C' or 'D'.", + "transfer_ori_acc": 8.865248226950355, + "transfer_atk_acc": 8.156028368794328 + } +] \ No newline at end of file diff --git a/results_transfer/google-flan-t5-large_chatgpt/bertattack_0_shot.json b/results_transfer/google-flan-t5-large_chatgpt/bertattack_0_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..175ea53d1eb5d01fc882d47afd1f847112d5767e --- /dev/null +++ b/results_transfer/google-flan-t5-large_chatgpt/bertattack_0_shot.json @@ -0,0 +1,242 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 77.9, + "origin_prompt": "Examine the given sentence and decide if it is grammatically sound. Answer with either 'Acceptable' or 'Unacceptable':", + "atk_acc": 30.9, + "atk_prompt": "examine the english plural and say if it is grammaticalmost sound. Answer with neither 'Acceptable' or 'Unacceptable':", + "transfer_ori_acc": 77.0, + "transfer_atk_acc": 36.0 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 76.3, + "origin_prompt": "As an instrument for grammar evaluation, consider the sentence and determine if its grammar is correct, responding with 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar:", + "atk_acc": 30.9, + "atk_prompt": "for an organ for linguistic evaluation, write the movie and question if its grammar is incorrect, and with 'acceptable' for new grammar or 'unacceptable' for unstable grammar:", + "transfer_ori_acc": 79.0, + "transfer_atk_acc": 0.0 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 82.11, + "origin_prompt": "Do these two sentences have the same underlying meaning? Respond with 'equivalent' or 'not_equivalent'.", + "atk_acc": 16.67, + "atk_prompt": "do these two clauses have the same underlying meanings? deal with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 73.5, + "transfer_atk_acc": 69.5 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 82.6, + "origin_prompt": "In your capacity as a semantic comparison tool, examine the given sentences and decide if their meanings are identical, responding with 'equivalent' or 'not_equivalent'.", + "atk_acc": 9.07, + "atk_prompt": "using your capacity as a semantic relations member, determine the given words and decide if their meanings are consecutive, comparison with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 75.0, + "transfer_atk_acc": 4.5 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 88.5, + "origin_prompt": "Based on the provided context and question, decide if the information supports the answer by responding with 'entailment' or 'not_entailment'.", + "atk_acc": 27.6, + "atk_prompt": "following on the supplied context and questioning, save if the information states the response by simply with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 78.0, + "transfer_atk_acc": 36.0 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 89.2, + "origin_prompt": "As a textual analyst, examine if the given context logically implies the answer to the question and indicate your decision with 'entailment' or 'not_entailment'.", + "atk_acc": 11.8, + "atk_prompt": "as a textual analyst, scrutiny if the following means logical) implies the reply to the answer and cross your r with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 65.5, + "transfer_atk_acc": 9.0 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 88.1, + "origin_prompt": "Are the following two questions equivalent or not? Answer me with \"equivalent\" or \"not_equivalent\".", + "atk_acc": 80.2, + "atk_prompt": "to the next enough being equivalent or not? Answer me with \"equivalent\" or \"not_equivalent\". ", + "transfer_ori_acc": 81.0, + "transfer_atk_acc": 74.5 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 87.7, + "origin_prompt": "Acting as a question equivalence instrument, determine if the provided questions are equivalent in meaning, answering with 'equivalent' for similar questions or 'not_equivalent' for dissimilar ones.", + "atk_acc": 72.2, + "atk_prompt": "t as a question equal instrument, determine if the program questions are equivalent in semantic, asking with 'equivalent' for analogous question or 'not_equivalent' for nonssimicomplex times. ", + "transfer_ori_acc": 80.5, + "transfer_atk_acc": 81.5 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 86.64, + "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.", + "atk_acc": 81.59, + "atk_prompt": "be the following deux verbs entailment or not_entailment? asking me with \"entailment\" or \"not_entailment\", just one word. ", + "transfer_ori_acc": 81.0, + "transfer_atk_acc": 76.5 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 84.84, + "origin_prompt": "While performing entailment analysis, classify the relationship between the provided sentences as 'entailment' or 'not_entailment':", + "atk_acc": 78.7, + "atk_prompt": "While performing entailment analysis, construct the relationship between the and sentence as 'entailment' or 'not_entailment':", + "transfer_ori_acc": 78.0, + "transfer_atk_acc": 75.5 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 94.95, + "origin_prompt": "Given the context of this text, indicate if the emotion conveyed is 'positive' or 'negative':", + "atk_acc": 92.89, + "atk_prompt": "give the context of this context, and if the emotion recorded is 'positive' or 'negative': ", + "transfer_ori_acc": 86.5, + "transfer_atk_acc": 0.0 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 95.41, + "origin_prompt": "In the role of a sentiment analysis tool, respond with 'positive' or 'negative' to classify this statement:", + "atk_acc": 89.79, + "atk_prompt": "how the role of a compliment analysis tool, responses with 'positive' or 'negative' to mood this statement: ", + "transfer_ori_acc": 92.5, + "transfer_atk_acc": 90.0 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 66.2, + "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.", + "atk_acc": 54.93, + "atk_prompt": "are the following analogous sentence entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one verb. ", + "transfer_ori_acc": 64.7887323943662, + "transfer_atk_acc": 63.38028169014085 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 64.79, + "origin_prompt": "In the capacity of an entailment assessment system, indicate if the link between the following sentences is 'entailment' or 'not_entailment':", + "atk_acc": 56.34, + "atk_prompt": "h the capacity of an entailment assessment system, indicate if the link between the following verb is 'entailment' or 'not_entailment':", + "transfer_ori_acc": 57.74647887323944, + "transfer_atk_acc": 49.29577464788733 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 45.57, + "origin_prompt": "For the multiple-choice question related to {}, please choose the most accurate answer from 'A', 'B', 'C', or 'D'.", + "atk_acc": 16.84, + "atk_prompt": "For the multiple-choice choice similar to {}, please find the most inaccurate questions from 'A', 'B', 'C', or 'D'. ", + "transfer_ori_acc": 63.0, + "transfer_atk_acc": 10.0 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 44.86, + "origin_prompt": "Utilizing your expertise in {}, answer the following multiple-choice question by picking 'A', 'B', 'C', or 'D'.", + "atk_acc": 3.19, + "atk_prompt": "Utilizing your expertise in {}, write the following multiple-choices question by each 'A', 'B', 'C', or 'D'.", + "transfer_ori_acc": 60.0, + "transfer_atk_acc": 59.5 + }, + { + "dataset": "squad_v2", + "type": "task", + "origin_acc": 87.82, + "origin_prompt": "Search the context for the most relevant answer. If the answer cannot be found, respond with 'unanswerable'.", + "atk_acc": 36.51, + "atk_prompt": "return the topic for the most mentioned question. then the answer cannot be found, respond with 'unanswerable'.", + "transfer_ori_acc": 61.62386143989461, + "transfer_atk_acc": 63.478546402546655 + }, + { + "dataset": "squad_v2", + "type": "role", + "origin_acc": 88.05, + "origin_prompt": "As an authority on the context, locate the most accurate answer. If the context doesn't contain the answer, mention 'unanswerable'.", + "atk_acc": 42.92, + "atk_prompt": "in an authority on the context, discover the most incorrect explanation. ensure the person doesn't contains the answer, mention 'unanswerable'.", + "transfer_ori_acc": 57.818138335095256, + "transfer_atk_acc": 35.36431912991652 + }, + { + "dataset": "iwslt", + "type": "task", + "origin_acc": 0.14, + "origin_prompt": "Please perform a translation of the given sentence, converting it from {} to {}:", + "atk_acc": 0.13, + "atk_prompt": "Please perform a translation of the given citation, converting it from {} to {}:", + "transfer_ori_acc": 0.21840008360286153, + "transfer_atk_acc": 0.2179261989361023 + }, + { + "dataset": "iwslt", + "type": "role", + "origin_acc": 0.14, + "origin_prompt": "In the capacity of a proficient translator, please change the following sentence from {} to {}:", + "atk_acc": 0.11, + "atk_prompt": "i the capacity of a possessed translator, may modification the following sentence from {} to {}: ", + "transfer_ori_acc": 0.22335240384171567, + "transfer_atk_acc": 0.21951448904038018 + }, + { + "dataset": "un_multi", + "type": "task", + "origin_acc": 0.27, + "origin_prompt": "Convert the following sentence from its original {} language to the target language {}:", + "atk_acc": 0.21, + "atk_prompt": "refer the following sentence from its original {} language to the targets i {}:", + "transfer_ori_acc": 0.32648562280284094, + "transfer_atk_acc": 0.32318435707787674 + }, + { + "dataset": "un_multi", + "type": "role", + "origin_acc": 0.27, + "origin_prompt": "As a qualified translator, convert the given sentence from its original {} language to the target language {}:", + "atk_acc": 0.17, + "atk_prompt": "while a reflex translator, reverse the given sentence from its original {} language to the target languages {}: ", + "transfer_ori_acc": 0.3295374598940522, + "transfer_atk_acc": 0.3374240386421357 + }, + { + "dataset": "math", + "type": "task", + "origin_acc": 14.37, + "origin_prompt": "Obtain the result for this math question regarding {}:", + "atk_acc": 2.5, + "atk_prompt": "Obtain the procedure for this math question regarding {}:", + "transfer_ori_acc": 6.875000000000001, + "transfer_atk_acc": 0.625 + }, + { + "dataset": "math", + "type": "role", + "origin_acc": 13.12, + "origin_prompt": "As a mathematics instructor, calculate the answer to the following problem related to {}:", + "atk_acc": 1.88, + "atk_prompt": "As a mathematics instructor, calculate the sum to the following problem related to {}: ", + "transfer_ori_acc": 5.0, + "transfer_atk_acc": 2.5 + } +] \ No newline at end of file diff --git a/results_transfer/google-flan-t5-large_chatgpt/bertattack_3_shot.json b/results_transfer/google-flan-t5-large_chatgpt/bertattack_3_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..2c4740cb711cdc6a60f9f138c80acf5ca8131b9a --- /dev/null +++ b/results_transfer/google-flan-t5-large_chatgpt/bertattack_3_shot.json @@ -0,0 +1,242 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 78.1, + "origin_prompt": "Assess the following sentence and determine if it is grammatically correct. Respond with 'Acceptable' or 'Unacceptable'.", + "atk_acc": 69.9, + "atk_prompt": "Assess the following conversation and pick if it is grammatical: correct. interact with 'Acceptable' or 'Unacceptable'. ", + "transfer_ori_acc": 80.0, + "transfer_atk_acc": 80.5 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 75.8, + "origin_prompt": "As an instrument for grammar evaluation, consider the sentence and determine if its grammar is correct, responding with 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar.", + "atk_acc": 69.2, + "atk_prompt": "As an item for grammar acquisition, determine the conversation and determine if its grammar is okay, responding with 'acceptable' for normal grammar or 'unacceptable' for using spelling. ", + "transfer_ori_acc": 81.5, + "transfer_atk_acc": 82.0 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 82.6, + "origin_prompt": "Can the given sentences be considered semantically identical? Please reply with 'equivalent' or 'not_equivalent'.", + "atk_acc": 73.28, + "atk_prompt": "can the given words be considered semantically adjacent? Please respond with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 72.0, + "transfer_atk_acc": 72.5 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 81.37, + "origin_prompt": "As a semantic comparison expert, evaluate the given pair of sentences and determine if they are 'equivalent' or 'not_equivalent'.", + "atk_acc": 68.87, + "atk_prompt": "As a grammatical compares professionals, find the given language of sentence and determine if they are 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 70.0, + "transfer_atk_acc": 66.0 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 89.3, + "origin_prompt": "Based on the provided context and question, decide if the information supports the answer by responding with 'entailment' or 'not_entailment'.", + "atk_acc": 45.5, + "atk_prompt": "from on the supplied context and questions, save if the information supports the answer by simply with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 82.0, + "transfer_atk_acc": 81.0 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 88.5, + "origin_prompt": "In your role as a semantic evaluator, determine if the provided context justifies the answer to the question and answer with 'entailment' or 'not_entailment'.", + "atk_acc": 40.6, + "atk_prompt": "when your role as a semantic evaluator, be if the reference subject justmatches the answer to the answer and also with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 81.5, + "transfer_atk_acc": 76.5 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 88.1, + "origin_prompt": "Are the following two questions equivalent or not? Answer me with \"equivalent\" or \"not_equivalent\".", + "atk_acc": 82.5, + "atk_prompt": "Are the so a equations equivalent or not? Answer me with \"equivalent\" or \"not_equivalent\". ", + "transfer_ori_acc": 83.0, + "transfer_atk_acc": 82.5 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 86.5, + "origin_prompt": "As an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions.", + "atk_acc": 63.9, + "atk_prompt": "to an instrument for question ranking evaluation, consider the words and pick if their mood is the same, returning with 'equivalent' for analogous question or 'not_equivalent' for known questions. ", + "transfer_ori_acc": 83.0, + "transfer_atk_acc": 81.5 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 85.2, + "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.", + "atk_acc": 80.87, + "atk_prompt": "can the is compare sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word. ", + "transfer_ori_acc": 83.0, + "transfer_atk_acc": 83.0 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 84.12, + "origin_prompt": "Acting as an entailment detection instrument, determine if the given pair of sentences demonstrates entailment or not_entailment. Answer with 'entailment' or 'not_entailment'.", + "atk_acc": 78.7, + "atk_prompt": "Acting as an entailment detection instrument, be if the given language of sentences does entailment or not_entailment. start with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 83.5, + "transfer_atk_acc": 84.0 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 95.3, + "origin_prompt": "Considering the given phrase, would you say it carries a 'positive' or 'negative' connotation?", + "atk_acc": 91.51, + "atk_prompt": "given the verbal conjunction, who you say it implies a 'positive' or 'negative' connotation? ", + "transfer_ori_acc": 85.5, + "transfer_atk_acc": 91.0 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 95.41, + "origin_prompt": "Acting as a sentiment evaluator, identify if the given sentence is 'positive' or 'negative'.", + "atk_acc": 92.43, + "atk_prompt": "Acting as a positive evaluator, choose if the positive sentence is 'positive' or 'negative'. ", + "transfer_ori_acc": 96.0, + "transfer_atk_acc": 97.0 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 64.79, + "origin_prompt": "Assess the connection between the following sentences and classify it as 'entailment' or 'not_entailment'.", + "atk_acc": 54.93, + "atk_prompt": "Assess the connecting between the relevant sentences and classify it as 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 66.19718309859155, + "transfer_atk_acc": 57.74647887323944 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 66.2, + "origin_prompt": "Functioning as an entailment evaluation tool, analyze the provided sentences and decide if their relationship is 'entailment' or 'not_entailment'.", + "atk_acc": 49.3, + "atk_prompt": "Functioning as an entailment evaluation tool, analyze the generated sentences and place if their conversation is 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 66.19718309859155, + "transfer_atk_acc": 66.19718309859155 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 46.45, + "origin_prompt": "For each multiple-choice question about {}, identify the correct answer by selecting 'A', 'B', 'C', or 'D'.", + "atk_acc": 16.84, + "atk_prompt": "For each multiple-. question about {}, reveal the incorrect ask by selecting 'A', 'B', 'C', or 'D'. ", + "transfer_ori_acc": 66.0, + "transfer_atk_acc": 25.0 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 46.99, + "origin_prompt": "Considering your familiarity with {}, attend to the following multiple-choice question by picking 'A', 'B', 'C', or 'D'.", + "atk_acc": 43.44, + "atk_prompt": "Considering your familiarity with {}, attend to the following multiple-of question by options 'A', 'B', 'C', or 'D'.", + "transfer_ori_acc": 66.0, + "transfer_atk_acc": 63.0 + }, + { + "dataset": "squad_v2", + "type": "task", + "origin_acc": 87.91, + "origin_prompt": "From the context, provide the most precise answer. If the answer is not in the context, respond with 'unanswerable'.", + "atk_acc": 73.79, + "atk_prompt": "about the context, provide the most vague explanation. given the answered is not in the sentence, respond with 'unanswerable'.", + "transfer_ori_acc": 69.28878149477491, + "transfer_atk_acc": 74.23346485915137 + }, + { + "dataset": "squad_v2", + "type": "role", + "origin_acc": 87.05, + "origin_prompt": "As a subject matter expert, extract the correct answer from the context. If an answer is not present, indicate 'unanswerable'.", + "atk_acc": 43.0, + "atk_prompt": "provide a question ask specialist, extract the incorrect field from the context. If an answer is not present, indicate 'unanswerable'.", + "transfer_ori_acc": 72.59620212188865, + "transfer_atk_acc": 71.34409116095414 + }, + { + "dataset": "iwslt", + "type": "task", + "origin_acc": 0.22, + "origin_prompt": "Convert the following sentence from its original {} language to the target language {}.", + "atk_acc": 0.21, + "atk_prompt": "replace the this verb from its originally {} language to the target language {}. ", + "transfer_ori_acc": 0.23822015459148888, + "transfer_atk_acc": 0.2403233731592474 + }, + { + "dataset": "iwslt", + "type": "role", + "origin_acc": 0.22, + "origin_prompt": "As a qualified translator, convert the given sentence from its original {} language to the target language {}.", + "atk_acc": 0.21, + "atk_prompt": "form a qualified translator, manipulate the given example from its original {} languages to the target language {}. ", + "transfer_ori_acc": 0.23739541096852568, + "transfer_atk_acc": 0.24074409492166066 + }, + { + "dataset": "un_multi", + "type": "task", + "origin_acc": 0.31, + "origin_prompt": "Convert the following sentence from its original {} language to the target language {}.", + "atk_acc": 0.3, + "atk_prompt": "translate the second sentence from its original {} language to the target language {}. ", + "transfer_ori_acc": 0.34683343580119863, + "transfer_atk_acc": 0.3530788033705874 + }, + { + "dataset": "un_multi", + "type": "role", + "origin_acc": 0.31, + "origin_prompt": "As a qualified translator, convert the given sentence from its original {} language to the target language {}.", + "atk_acc": 0.3, + "atk_prompt": "As a trained translator, convert the given case from its original {} language to the project languages {}. ", + "transfer_ori_acc": 0.35108303624453957, + "transfer_atk_acc": 0.3516552246010325 + }, + { + "dataset": "math", + "type": "task", + "origin_acc": 17.5, + "origin_prompt": "Find the solution for this mathematical challenge with {}.", + "atk_acc": 15.0, + "atk_prompt": "Find the conclusion for this mathematical challenge with {}. ", + "transfer_ori_acc": 23.125, + "transfer_atk_acc": 20.0 + }, + { + "dataset": "math", + "type": "role", + "origin_acc": 18.12, + "origin_prompt": "Embodying a mathematics advisor, find the answer to the following math problem with {}.", + "atk_acc": 9.38, + "atk_prompt": "Embodying a mathematics advisor, find the subject to the such math item with {}. ", + "transfer_ori_acc": 23.125, + "transfer_atk_acc": 25.624999999999996 + } +] \ No newline at end of file diff --git a/results_transfer/google-flan-t5-large_chatgpt/checklist_0_shot.json b/results_transfer/google-flan-t5-large_chatgpt/checklist_0_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..c2f267dde0a956dc075e4d568ebb937a11cf9b2b --- /dev/null +++ b/results_transfer/google-flan-t5-large_chatgpt/checklist_0_shot.json @@ -0,0 +1,242 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 77.9, + "origin_prompt": "Examine the given sentence and decide if it is grammatically sound. Answer with either 'Acceptable' or 'Unacceptable':", + "atk_acc": 62.6, + "atk_prompt": "Examine the given sentence and decide if it is grammatically sound. Answer with either 'Acceptable' or 'Unacceptable' IhuBIhoPGc:", + "transfer_ori_acc": 77.0, + "transfer_atk_acc": 75.0 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 76.3, + "origin_prompt": "As an instrument for grammar evaluation, consider the sentence and determine if its grammar is correct, responding with 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar:", + "atk_acc": 74.9, + "atk_prompt": "As an instrument for grammar evaluation, consider the sentence and determine if its grammar is correct, responding with 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar IhuBIhoPGc:", + "transfer_ori_acc": 78.5, + "transfer_atk_acc": 72.0 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 82.11, + "origin_prompt": "Do these two sentences have the same underlying meaning? Respond with 'equivalent' or 'not_equivalent'.", + "atk_acc": 79.17, + "atk_prompt": "Do these two sentences have the same underlying meaning? Respond with 'equivalent' or 'not_equivalent' MSAddJ4D2a. ", + "transfer_ori_acc": 73.5, + "transfer_atk_acc": 75.0 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 82.6, + "origin_prompt": "As a semantic comparison expert, evaluate the given pair of sentences and determine if they are 'equivalent' or 'not_equivalent'.", + "atk_acc": 80.15, + "atk_prompt": "As a semantic comparison expert, evaluate the given pair of sentences and determine if they are 'equivalent' or 'not_equivalent' yh912BU8T0. ", + "transfer_ori_acc": 72.5, + "transfer_atk_acc": 74.0 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 88.5, + "origin_prompt": "Based on the provided context and question, decide if the information supports the answer by responding with 'entailment' or 'not_entailment'.", + "atk_acc": 87.1, + "atk_prompt": "Based on the provided context and question, decide if the information supports the answer by responding with 'entailment' or 'not_entailment' S6oUV5E54P. ", + "transfer_ori_acc": 78.0, + "transfer_atk_acc": 75.5 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 89.4, + "origin_prompt": "As a semantic interpreter, assess whether the provided context supports the answer to the given question and answer with 'entailment' or 'not_entailment'.", + "atk_acc": 88.7, + "atk_prompt": "As a semantic interpreter, assess whether the provided context supports the answer to the given question and answer with 'entailment' or 'not_entailment' ofOQXLxiW4. ", + "transfer_ori_acc": 67.5, + "transfer_atk_acc": 72.5 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 87.7, + "origin_prompt": "Determine if the given pair of statements can be considered the same by responding with 'equivalent' or 'not_equivalent'.", + "atk_acc": 86.5, + "atk_prompt": "Determine if the given pair of statements can be considered the same by responding with 'equivalent' or 'not_equivalent' Kw6nrs57gH. ", + "transfer_ori_acc": 81.5, + "transfer_atk_acc": 81.5 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 86.9, + "origin_prompt": "Functioning as a question similarity evaluation tool, analyze the given questions and decide if they share the same meaning, responding with 'equivalent' or 'not_equivalent'.", + "atk_acc": 86.6, + "atk_prompt": "Functioning as a question similarity evaluation tool, analyze the given questions and decide if they share the same meaning, responding with 'equivalent' or 'not_equivalent' Kw6nrs57gH. ", + "transfer_ori_acc": 81.5, + "transfer_atk_acc": 82.0 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 86.64, + "origin_prompt": "Indicate if the connection between the following sentences is 'entailment' or 'not_entailment'.", + "atk_acc": 83.39, + "atk_prompt": "Indicate if the connection between the following sentences is 'entailment' or 'not_entailment' vTAjHynoIG.", + "transfer_ori_acc": 76.5, + "transfer_atk_acc": 77.5 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 84.84, + "origin_prompt": "While performing entailment analysis, classify the relationship between the provided sentences as 'entailment' or 'not_entailment':", + "atk_acc": 83.75, + "atk_prompt": "While performing entailment analysis, classify the relationship between the provided sentences as 'entailment' or 'not_entailment' CuVpbbkC4c:", + "transfer_ori_acc": 79.0, + "transfer_atk_acc": 80.5 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 94.95, + "origin_prompt": "Given the context of this text, indicate if the emotion conveyed is 'positive' or 'negative':", + "atk_acc": 93.58, + "atk_prompt": "Given the context of this text, indicate if the emotion conveyed is 'positive' or 'negative' LkF0FZxMZ4: ", + "transfer_ori_acc": 86.5, + "transfer_atk_acc": 87.0 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 95.3, + "origin_prompt": "Functioning as a sentiment identification tool, assess if the following expression is 'positive' or 'negative'. Classify:", + "atk_acc": 94.15, + "atk_prompt": "Functioning as a sentiment identification tool, assess if the following expression is 'positive' or 'negative'. Classify ItGDrrA1Sc: ", + "transfer_ori_acc": 92.0, + "transfer_atk_acc": 93.0 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 64.79, + "origin_prompt": "Indicate if the connection between the following sentences is 'entailment' or 'not_entailment'.", + "atk_acc": 61.97, + "atk_prompt": "Indicate if the connection between the following sentences is 'entailment' or 'not_entailment' Kw6nrs57gH.", + "transfer_ori_acc": 52.112676056338024, + "transfer_atk_acc": 57.74647887323944 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 66.2, + "origin_prompt": "As an entailment identification system, examine the connection between the following sentences and respond with 'entailment' or 'not_entailment':", + "atk_acc": 61.97, + "atk_prompt": "As an entailment identification system, examine the connection between the following sentences and respond with 'entailment' or 'not_entailment' SFPCqpiClT:", + "transfer_ori_acc": 50.70422535211267, + "transfer_atk_acc": 53.52112676056338 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 45.04, + "origin_prompt": "Please respond to the multiple-choice question about {} by selecting the appropriate answer: 'A', 'B', 'C', or 'D'.", + "atk_acc": 43.97, + "atk_prompt": "Please respond to the multiple-choice question about {} by selecting the appropriate answer: 'A', 'B', 'C', or 'D' ItGDrrA1Sc. ", + "transfer_ori_acc": 62.0, + "transfer_atk_acc": 60.5 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 44.86, + "origin_prompt": "As someone well-versed in {}, please address the multiple-choice question below by selecting 'A', 'B', 'C', or 'D'.", + "atk_acc": 43.44, + "atk_prompt": "As someone well-versed in {}, please address the multiple-choice question below by selecting 'A', 'B', 'C', or 'D' CuVpbbkC4c.", + "transfer_ori_acc": 59.5, + "transfer_atk_acc": 61.0 + }, + { + "dataset": "squad_v2", + "type": "task", + "origin_acc": 87.65, + "origin_prompt": "Please derive the most fitting answer from the context. If there isn't an answer in the context, respond with 'unanswerable'.", + "atk_acc": 86.82, + "atk_prompt": "Please derive the most fitting answer from the context. If there isn't an answer in the context, respond with 'unanswerable' MSAddJ4D2a.", + "transfer_ori_acc": 69.02399528817593, + "transfer_atk_acc": 69.50796630082341 + }, + { + "dataset": "squad_v2", + "type": "role", + "origin_acc": 88.05, + "origin_prompt": "As an authority on the context, locate the most accurate answer. If the context doesn't contain the answer, mention 'unanswerable'.", + "atk_acc": 87.15, + "atk_prompt": "As an authority on the context, locate the most accurate answer. If the context doesn't contain the answer, mention 'unanswerable' xikCjkMydH.", + "transfer_ori_acc": 58.75591830538296, + "transfer_atk_acc": 61.29287775116594 + }, + { + "dataset": "iwslt", + "type": "task", + "origin_acc": 0.14, + "origin_prompt": "Please perform a translation of the given sentence, converting it from {} to {}:", + "atk_acc": 0.1, + "atk_prompt": "Please perform a translation of the given sentence, converting it from {} to mzjVogkRhM {}:", + "transfer_ori_acc": 0.21840008360286153, + "transfer_atk_acc": 0.22514742934128096 + }, + { + "dataset": "iwslt", + "type": "role", + "origin_acc": 0.14, + "origin_prompt": "Taking on the role of a certified translator, perform a translation of the given sentence from {} to {}:", + "atk_acc": 0.08, + "atk_prompt": "Taking on the role of a certified translator, perform a translation of the given sentence from {} to zexKUTWJYG {}: ", + "transfer_ori_acc": 0.22600539164221617, + "transfer_atk_acc": 0.22448676631201958 + }, + { + "dataset": "un_multi", + "type": "task", + "origin_acc": 0.27, + "origin_prompt": "Accurately translate the sentence from {} to {}, ensuring the meaning remains intact:", + "atk_acc": 0.21, + "atk_prompt": "Accurately translate the sentence from {} to {}, ensuring the meaning remains intact EAB4KP2NVY:", + "transfer_ori_acc": 0.3279360242319025, + "transfer_atk_acc": 0.3291687239207952 + }, + { + "dataset": "un_multi", + "type": "role", + "origin_acc": 0.27, + "origin_prompt": "Assuming the role of an expert language translator, translate the given text from {} to {}:", + "atk_acc": 0.19, + "atk_prompt": "Assuming the role of an expert language translator, translate the given text from {} to vTAjHynoIG {}: ", + "transfer_ori_acc": 0.3317016758714199, + "transfer_atk_acc": 0.3316999414372619 + }, + { + "dataset": "math", + "type": "task", + "origin_acc": 16.25, + "origin_prompt": "Evaluate the following mathematical problem that includes {}:", + "atk_acc": 10.62, + "atk_prompt": "Evaluate the following mathematical problem that includes is77sOXAu8 {}:", + "transfer_ori_acc": 4.375, + "transfer_atk_acc": 4.375 + }, + { + "dataset": "math", + "type": "role", + "origin_acc": 14.37, + "origin_prompt": "As a math guru, evaluate the following mathematical task regarding {}:", + "atk_acc": 10.0, + "atk_prompt": "As a math guru, evaluate the following mathematical task regarding KjPJJ2a7RB {}: ", + "transfer_ori_acc": 4.375, + "transfer_atk_acc": 2.5 + } +] \ No newline at end of file diff --git a/results_transfer/google-flan-t5-large_chatgpt/checklist_3_shot.json b/results_transfer/google-flan-t5-large_chatgpt/checklist_3_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..f6f8b52ad0ba3b3522b57973c226744a4d4a8c79 --- /dev/null +++ b/results_transfer/google-flan-t5-large_chatgpt/checklist_3_shot.json @@ -0,0 +1,242 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 78.1, + "origin_prompt": "Assess the following sentence and determine if it is grammatically correct. Respond with 'Acceptable' or 'Unacceptable'.", + "atk_acc": 77.0, + "atk_prompt": "Assess the following sentence and determine if it is grammatically correct. Respond with 'Acceptable' or 'Unacceptable' zexKUTWJYG. ", + "transfer_ori_acc": 80.0, + "transfer_atk_acc": 79.5 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 75.7, + "origin_prompt": "While performing grammar analysis, classify the grammar of the following sentence as 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar.", + "atk_acc": 74.6, + "atk_prompt": "While performing grammar analysis, classify the grammar of the following sentence as 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar zexKUTWJYG. ", + "transfer_ori_acc": 82.0, + "transfer_atk_acc": 81.0 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 82.84, + "origin_prompt": "Please analyze the provided sentences and indicate if their meanings are the same by choosing 'equivalent' or 'not_equivalent'.", + "atk_acc": 80.88, + "atk_prompt": "Please analyze the provided sentences and indicate if their meanings are the same by choosing 'equivalent' or 'not_equivalent' CuVpbbkC4c. ", + "transfer_ori_acc": 71.0, + "transfer_atk_acc": 70.5 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 80.15, + "origin_prompt": "In the role of a sentence comparison analyst, assess the provided sentences and indicate if they convey the same meaning by selecting 'equivalent' or 'not_equivalent'.", + "atk_acc": 79.66, + "atk_prompt": "In the role of a sentence comparison analyst, assess the provided sentences and indicate if they convey the same meaning by selecting 'equivalent' or 'not_equivalent' BWmsLx8xOA. ", + "transfer_ori_acc": 71.5, + "transfer_atk_acc": 70.5 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 87.8, + "origin_prompt": "Based on the information in the context, decide if the answer to the question is justified by choosing 'entailment' or 'not_entailment'.", + "atk_acc": 86.4, + "atk_prompt": "Based on the information in the context, decide if the answer to the question is justified by choosing 'entailment' or 'not_entailment' mOJ9nEwT4f. ", + "transfer_ori_acc": 82.0, + "transfer_atk_acc": 80.5 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 88.5, + "origin_prompt": "In your role as a semantic evaluator, determine if the provided context justifies the answer to the question and answer with 'entailment' or 'not_entailment'.", + "atk_acc": 87.7, + "atk_prompt": "In your role as a semantic evaluator, determine if the provided context justifies the answer to the question and answer with 'entailment' or 'not_entailment' MSAddJ4D2a. ", + "transfer_ori_acc": 81.5, + "transfer_atk_acc": 83.0 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 87.6, + "origin_prompt": "Are the meanings of these two phrases the same? Reply with 'equivalent' or 'not_equivalent'.", + "atk_acc": 87.4, + "atk_prompt": "Are the meanings of these two phrases the same? Reply with 'equivalent' or 'not_equivalent' Pbl3tYuoRo. ", + "transfer_ori_acc": 81.0, + "transfer_atk_acc": 79.5 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 86.5, + "origin_prompt": "As an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions.", + "atk_acc": 86.0, + "atk_prompt": "As an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions 6a4Yn3RGVc. ", + "transfer_ori_acc": 83.0, + "transfer_atk_acc": 83.0 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 84.84, + "origin_prompt": "Identify whether the given pair of sentences demonstrates entailment or not_entailment. Answer with 'entailment' or 'not_entailment'.", + "atk_acc": 83.39, + "atk_prompt": "Identify whether the given pair of sentences demonstrates entailment or not_entailment. Answer with 'entailment' or 'not_entailment' 6a4Yn3RGVc. ", + "transfer_ori_acc": 82.0, + "transfer_atk_acc": 82.5 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 84.12, + "origin_prompt": "In the capacity of an entailment assessment system, indicate if the link between the following sentences is 'entailment' or 'not_entailment'.", + "atk_acc": 82.31, + "atk_prompt": "In the capacity of an entailment assessment system, indicate if the link between the following sentences is 'entailment' or 'not_entailment' MQnugHcaoy. ", + "transfer_ori_acc": 82.0, + "transfer_atk_acc": 81.5 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 94.95, + "origin_prompt": "Analyze the tone of this statement and respond with either 'positive' or 'negative'.", + "atk_acc": 94.5, + "atk_prompt": "Analyze the tone of this statement and respond with either 'positive' or 'negative' XStsD2pUzu. ", + "transfer_ori_acc": 94.5, + "transfer_atk_acc": 97.0 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 95.41, + "origin_prompt": "Acting as a sentiment evaluator, identify if the given sentence is 'positive' or 'negative'.", + "atk_acc": 94.84, + "atk_prompt": "Acting as a sentiment evaluator, identify if the given sentence is 'positive' or 'negative' SFPCqpiClT. ", + "transfer_ori_acc": 96.5, + "transfer_atk_acc": 96.0 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 64.79, + "origin_prompt": "Assess the connection between the following sentences and classify it as 'entailment' or 'not_entailment'.", + "atk_acc": 63.38, + "atk_prompt": "Assess the connection between the following sentences and classify it as 'entailment' or 'not_entailment' Cev0s49fbe. ", + "transfer_ori_acc": 66.19718309859155, + "transfer_atk_acc": 69.01408450704226 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 66.2, + "origin_prompt": "As an entailment identification system, examine the connection between the following sentences and respond with 'entailment' or 'not_entailment'.", + "atk_acc": 66.2, + "atk_prompt": "As an entailment identification system, examine the connection between the following sentences and respond with 'entailment' or 'not_entailment' 5XeflW1ZJc. ", + "transfer_ori_acc": 60.56338028169014, + "transfer_atk_acc": 63.38028169014085 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 45.92, + "origin_prompt": "Answer the subsequent multiple-choice question about {} by picking the right option among 'A', 'B', 'C', or 'D'.", + "atk_acc": 45.04, + "atk_prompt": "Answer the subsequent multiple-choice question about {} by picking the right option among 'A', 'B', 'C', or 'D' KjPJJ2a7RB. ", + "transfer_ori_acc": 66.0, + "transfer_atk_acc": 66.0 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 46.99, + "origin_prompt": "Considering your familiarity with {}, attend to the following multiple-choice question by picking 'A', 'B', 'C', or 'D'.", + "atk_acc": 45.04, + "atk_prompt": "Considering your familiarity with {}, attend to the following multiple-choice question by picking 'A', 'B', 'C', or 'D' SFPCqpiClT.", + "transfer_ori_acc": 65.5, + "transfer_atk_acc": 65.5 + }, + { + "dataset": "squad_v2", + "type": "task", + "origin_acc": 87.91, + "origin_prompt": "From the context, provide the most precise answer. If the answer is not in the context, respond with 'unanswerable'.", + "atk_acc": 87.05, + "atk_prompt": "From the context, provide the most precise answer. If the answer is not in the context, respond with 'unanswerable' 5yWbBXztUY.", + "transfer_ori_acc": 69.33640054239395, + "transfer_atk_acc": 68.59989260588603 + }, + { + "dataset": "squad_v2", + "type": "role", + "origin_acc": 87.05, + "origin_prompt": "Drawing upon your expertise in the context, determine the most suitable answer. If an answer isn't available, state 'unanswerable'.", + "atk_acc": 86.55, + "atk_prompt": "Drawing upon your expertise in the context, determine the most suitable answer. If an answer isn't available, state 'unanswerable' 5XeflW1ZJc.", + "transfer_ori_acc": 71.1631951530121, + "transfer_atk_acc": 74.10685231284573 + }, + { + "dataset": "iwslt", + "type": "task", + "origin_acc": 0.22, + "origin_prompt": "Convert the following sentence from its original {} language to the target language {}.", + "atk_acc": 0.22, + "atk_prompt": "Convert the following sentence from its original {} language to the target language LkF0FZxMZ4 {}. ", + "transfer_ori_acc": 0.23991880278182126, + "transfer_atk_acc": 0.2348130480000894 + }, + { + "dataset": "iwslt", + "type": "role", + "origin_acc": 0.22, + "origin_prompt": "In the role of a language expert, perform a machine translation for the provided sentence, changing it from {} to {}.", + "atk_acc": 0.21, + "atk_prompt": "In the role of a language expert, perform a machine translation for the provided sentence, changing it from {} to MQnugHcaoy {}. ", + "transfer_ori_acc": 0.244428238867859, + "transfer_atk_acc": 0.2280062687297454 + }, + { + "dataset": "un_multi", + "type": "task", + "origin_acc": 0.31, + "origin_prompt": "Translate the following text from the source language {} to the target language {}.", + "atk_acc": 0.3, + "atk_prompt": "Translate the following text from the source language {} to the target language Cev0s49fbe {}. ", + "transfer_ori_acc": 0.3520628408924414, + "transfer_atk_acc": 0.35416969837419654 + }, + { + "dataset": "un_multi", + "type": "role", + "origin_acc": 0.31, + "origin_prompt": "In the capacity of a proficient translator, please change the following sentence from {} to {}.", + "atk_acc": 0.29, + "atk_prompt": "In the capacity of a proficient translator, please change the following sentence from {} to OB4KVJzIft {}. ", + "transfer_ori_acc": 0.3486649402303781, + "transfer_atk_acc": 0.3424551550058589 + }, + { + "dataset": "math", + "type": "task", + "origin_acc": 17.5, + "origin_prompt": "Resolve the following mathematical question associated with {}.", + "atk_acc": 15.62, + "atk_prompt": "Resolve the following mathematical question associated with 6a4Yn3RGVc {}. ", + "transfer_ori_acc": 22.5, + "transfer_atk_acc": 24.375 + }, + { + "dataset": "math", + "type": "role", + "origin_acc": 17.5, + "origin_prompt": "Imagine being a math mentor and determine the solution to the following problem with {}.", + "atk_acc": 15.62, + "atk_prompt": "Imagine being a math mentor and determine the solution to the following problem with b74X5IVXQY {}. ", + "transfer_ori_acc": 22.5, + "transfer_atk_acc": 23.75 + } +] \ No newline at end of file diff --git a/results_transfer/google-flan-t5-large_chatgpt/deepwordbug_0_shot.json b/results_transfer/google-flan-t5-large_chatgpt/deepwordbug_0_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..8be8ed0da03d2c6fece2236812dd22d2516ee02f --- /dev/null +++ b/results_transfer/google-flan-t5-large_chatgpt/deepwordbug_0_shot.json @@ -0,0 +1,242 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 77.9, + "origin_prompt": "Examine the given sentence and decide if it is grammatically sound. Answer with either 'Acceptable' or 'Unacceptable':", + "atk_acc": 31.1, + "atk_prompt": "Exapine the gievn sentene and dVecide if it is grammktically sound. Answer with either 'Acceptable' or 'Unacceptable':", + "transfer_ori_acc": 77.0, + "transfer_atk_acc": 75.5 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 76.3, + "origin_prompt": "As an instrument for grammar evaluation, consider the sentence and determine if its grammar is correct, responding with 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar:", + "atk_acc": 36.5, + "atk_prompt": "sA an instrument for grammar evaluatiJn, cnsider the setnence and deterbmine if its grmamar is crorect, reYponding with 'acceptable' for corect gramOar or 'unacceptable' for incorGect rgammar:", + "transfer_ori_acc": 79.5, + "transfer_atk_acc": 80.0 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 82.35, + "origin_prompt": "Compare the following sentences and determine if they share the same semantic meaning by responding with 'equivalent' or 'not_equivalent'.", + "atk_acc": 76.96, + "atk_prompt": "Compar the folhowing sentences and deWermine if they share the same semantic emaning by respondig with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 73.0, + "transfer_atk_acc": 76.0 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 82.6, + "origin_prompt": "In your capacity as a semantic comparison tool, examine the given sentences and decide if their meanings are identical, responding with 'equivalent' or 'not_equivalent'.", + "atk_acc": 62.25, + "atk_prompt": "In your capacity as a seantic coGparison tolo, exaine the given esentences and decide if their meanings are identiacl, resYonding with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 75.0, + "transfer_atk_acc": 69.0 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 89.8, + "origin_prompt": "Evaluate whether the given context supports the answer to the question by responding with 'entailment' or 'not_entailment'.", + "atk_acc": 38.6, + "atk_prompt": "Evlauate whyther the giGven cotext supporzs the answeQ to the quePtion by rseponding with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 78.0, + "transfer_atk_acc": 74.5 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 89.4, + "origin_prompt": "As a semantic interpreter, assess whether the provided context supports the answer to the given question and answer with 'entailment' or 'not_entailment'.", + "atk_acc": 2.1, + "atk_prompt": "sA a esmantic interpreter, aBssess whehter the proviedd contexYt spuports the answher to the gEven questin and answOr with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 68.0, + "transfer_atk_acc": 65.0 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 87.8, + "origin_prompt": "Can these two statements be considered equal in meaning? Answer with 'equivalent' or 'not_equivalent'.", + "atk_acc": 59.8, + "atk_prompt": "Can these tow statYements be cnosidered qeual in maening? AnsweI with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 81.0, + "transfer_atk_acc": 72.0 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 86.9, + "origin_prompt": "Functioning as a question similarity evaluation tool, analyze the given questions and decide if they share the same meaning, responding with 'equivalent' or 'not_equivalent'.", + "atk_acc": 81.4, + "atk_prompt": "Functioning as a question smiilarity eavluation toXl, analyze the givYen questions and decidae if they share the same meajning, responding with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 81.5, + "transfer_atk_acc": 81.5 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 86.64, + "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.", + "atk_acc": 81.95, + "atk_prompt": "Aer the following two Ysentences entailment or not_entailment? nswer me with \"entailment\" or \"not_entailment\", just one word. ", + "transfer_ori_acc": 81.0, + "transfer_atk_acc": 83.5 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 84.84, + "origin_prompt": "While performing entailment analysis, classify the relationship between the provided sentences as 'entailment' or 'not_entailment':", + "atk_acc": 82.67, + "atk_prompt": "While performing entailment analysis, cassify the reiationship between the proviLded sentences as 'entailment' or 'not_entailment':", + "transfer_ori_acc": 79.0, + "transfer_atk_acc": 77.5 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 94.84, + "origin_prompt": "Analyze the tone of this statement and respond with either 'positive' or 'negative':", + "atk_acc": 94.04, + "atk_prompt": "Analze the tone of this sattement and respond with either 'positive' or 'negative': ", + "transfer_ori_acc": 89.5, + "transfer_atk_acc": 90.0 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 95.3, + "origin_prompt": "Serving as a sentiment evaluation model, determine if the given statement is 'positive' or 'negative'. Classify:", + "atk_acc": 64.91, + "atk_prompt": "Servign as a sentimBnt envaluation model, Qetermine if the Iiven statemen is 'positive' or 'negative'. Classhfy: ", + "transfer_ori_acc": 92.0, + "transfer_atk_acc": 91.0 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 66.2, + "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.", + "atk_acc": 60.56, + "atk_prompt": "Are the follHwing Ntwo sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one wor. ", + "transfer_ori_acc": 66.19718309859155, + "transfer_atk_acc": 64.7887323943662 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 66.2, + "origin_prompt": "As an entailment identification system, examine the connection between the following sentences and respond with 'entailment' or 'not_entailment':", + "atk_acc": 57.75, + "atk_prompt": "As an entailment identfiication zystem, examine the connection between the flolowing sentences and respond with 'entailment' or 'not_entailment':", + "transfer_ori_acc": 52.112676056338024, + "transfer_atk_acc": 50.70422535211267 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 45.74, + "origin_prompt": "For each multiple-choice question about {}, identify the correct answer by selecting 'A', 'B', 'C', or 'D'.", + "atk_acc": 42.2, + "atk_prompt": "For each multiple-chotce question about {}, dentify the caorrect answedr by selccting 'A', 'B', 'C', or 'D'. ", + "transfer_ori_acc": 59.5, + "transfer_atk_acc": 63.5 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 44.86, + "origin_prompt": "Utilizing your expertise in {}, answer the following multiple-choice question by picking 'A', 'B', 'C', or 'D'.", + "atk_acc": 41.13, + "atk_prompt": "Utilizing your expertise in {}, ansKer the following multiprle-choice question by ipcking 'A', 'B', 'C', or 'D'.", + "transfer_ori_acc": 60.5, + "transfer_atk_acc": 61.0 + }, + { + "dataset": "squad_v2", + "type": "task", + "origin_acc": 87.55, + "origin_prompt": "Find the correct answer in the context provided. If an answer cannot be found, please respond with 'unanswerable'.", + "atk_acc": 85.32, + "atk_prompt": "Find the Dcorrect answer in the context provided. GIf an answer cannot be found, please erspond with 'unanswerable'.", + "transfer_ori_acc": 67.5632226901742, + "transfer_atk_acc": 66.07909004721805 + }, + { + "dataset": "squad_v2", + "type": "role", + "origin_acc": 88.05, + "origin_prompt": "As an expert with a deep understanding of the context, find the best answer. If the context doesn't include an answer, say 'unanswerable'.", + "atk_acc": 86.92, + "atk_prompt": "As an expert with a deep understanding of the context, fHind the best answer. If the context doesn't include an answer, say 'unanswerable'.", + "transfer_ori_acc": 64.66646500169827, + "transfer_atk_acc": 63.33777694425634 + }, + { + "dataset": "iwslt", + "type": "task", + "origin_acc": 0.14, + "origin_prompt": "Perform a machine translation for the provided sentence, changing it from {} to {}:", + "atk_acc": 0.08, + "atk_prompt": "Perorm a nachine translatiou for the providde sentecne, changing it from {} to {}:", + "transfer_ori_acc": 0.22160247790642423, + "transfer_atk_acc": 0.22247429357782236 + }, + { + "dataset": "iwslt", + "type": "role", + "origin_acc": 0.14, + "origin_prompt": "Taking on the role of a certified translator, perform a translation of the given sentence from {} to {}:", + "atk_acc": 0.07, + "atk_prompt": "TakIing on the roel of a certifieFd trLanslator, erform a translaton of the guiven Ventence from {} to {}: ", + "transfer_ori_acc": 0.22524167015355148, + "transfer_atk_acc": 0.21936213420618 + }, + { + "dataset": "un_multi", + "type": "task", + "origin_acc": 0.27, + "origin_prompt": "Convert the following sentence from its original {} language to the target language {}:", + "atk_acc": 0.12, + "atk_prompt": "ConvHert the folloiwng setnence from its origianl {} langudage to the trget languaoe {}:", + "transfer_ori_acc": 0.3252157829213561, + "transfer_atk_acc": 0.32616092622927834 + }, + { + "dataset": "un_multi", + "type": "role", + "origin_acc": 0.27, + "origin_prompt": "As a qualified translator, convert the given sentence from its original {} language to the target language {}:", + "atk_acc": 0.16, + "atk_prompt": "ks a qualifieA translator, covnert the given sentence from its original {} language to the target lanugage {}: ", + "transfer_ori_acc": 0.32916254916267745, + "transfer_atk_acc": 0.33080526346344613 + }, + { + "dataset": "math", + "type": "task", + "origin_acc": 16.25, + "origin_prompt": "Evaluate the following mathematical problem that includes {}:", + "atk_acc": 8.12, + "atk_prompt": "vEaluate the followinLg mtahematical problem that incldes {}:", + "transfer_ori_acc": 4.375, + "transfer_atk_acc": 5.625 + }, + { + "dataset": "math", + "type": "role", + "origin_acc": 13.12, + "origin_prompt": "As a mathematics instructor, calculate the answer to the following problem related to {}:", + "atk_acc": 8.12, + "atk_prompt": "As a mathematics iestructor, calculate the answex to the following problem related to {}: ", + "transfer_ori_acc": 5.0, + "transfer_atk_acc": 4.375 + } +] \ No newline at end of file diff --git a/results_transfer/google-flan-t5-large_chatgpt/deepwordbug_3_shot.json b/results_transfer/google-flan-t5-large_chatgpt/deepwordbug_3_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..59b723d512fe81e11a4ef3ac584a3932b63bb9cb --- /dev/null +++ b/results_transfer/google-flan-t5-large_chatgpt/deepwordbug_3_shot.json @@ -0,0 +1,242 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 77.8, + "origin_prompt": "Is the provided sentence grammatically correct? Respond with 'Acceptable' or 'Unacceptable'.", + "atk_acc": 68.6, + "atk_prompt": "Is the provided semtence rammatically corrct? Respond with 'Acceptable' or 'Unacceptable'. ", + "transfer_ori_acc": 81.0, + "transfer_atk_acc": 81.0 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 75.7, + "origin_prompt": "While performing grammar analysis, classify the grammar of the following sentence as 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar.", + "atk_acc": 70.5, + "atk_prompt": "While hperforming grammNar anlaysis, classfiy the grammar of the foBlowing sentence as 'acceptable' for corrept grammgar or 'unacceptable' for incborrect grMammar. ", + "transfer_ori_acc": 82.0, + "transfer_atk_acc": 81.0 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 83.09, + "origin_prompt": "Evaluate whether the two provided sentences convey the same meaning by answering 'equivalent' or 'not_equivalent'.", + "atk_acc": 78.92, + "atk_prompt": "Evaluate whether the two provided senetnces convey the same meaning by answering 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 67.0, + "transfer_atk_acc": 66.0 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 81.37, + "origin_prompt": "As a semantic comparison expert, evaluate the given pair of sentences and determine if they are 'equivalent' or 'not_equivalent'.", + "atk_acc": 76.72, + "atk_prompt": "As a semantic comparisKon experEt, evaluate the given pari of sentence and determine if they are 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 69.0, + "transfer_atk_acc": 66.5 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 89.3, + "origin_prompt": "Based on the provided context and question, decide if the information supports the answer by responding with 'entailment' or 'not_entailment'.", + "atk_acc": 39.5, + "atk_prompt": "Baesd on the prohided conteMt and questino, dRcide if the infzrmation suppoLrts the aOnswer by resXonding with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 83.0, + "transfer_atk_acc": 80.0 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 88.8, + "origin_prompt": "In the capacity of a language evaluator, examine if the given context justifies the answer to the question and indicate your assessment with 'entailment' or 'not_entailment'.", + "atk_acc": 44.5, + "atk_prompt": "bn the cUapacity of a langtage valuator, Cxamine if the givcen contjxt jYustifies the anbwer to the qPuestion and idicate your assetssment with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 84.5, + "transfer_atk_acc": 79.0 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 88.1, + "origin_prompt": "Are the following two questions equivalent or not? Answer me with \"equivalent\" or \"not_equivalent\".", + "atk_acc": 77.3, + "atk_prompt": "re the flolowing two questFons equivalent or not? Answe me with \"equivalent\" or \"not_equivalent\". ", + "transfer_ori_acc": 82.5, + "transfer_atk_acc": 83.0 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 86.5, + "origin_prompt": "As an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions.", + "atk_acc": 81.8, + "atk_prompt": "As an instrument for question cotmparison evaluVtion, consider the questionPs and determbine if their maening is the same, responRing with 'equivalent' for similar questins or 'not_equivalent' for difserent questions. ", + "transfer_ori_acc": 83.0, + "transfer_atk_acc": 83.0 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 85.2, + "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.", + "atk_acc": 80.87, + "atk_prompt": "Aer the followig mwo sentences entailment or not_entailment? nswer me with \"entailment\" or \"not_entailment\", just noe word. ", + "transfer_ori_acc": 82.5, + "transfer_atk_acc": 82.5 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 84.12, + "origin_prompt": "Acting as an entailment detection instrument, determine if the given pair of sentences demonstrates entailment or not_entailment. Answer with 'entailment' or 'not_entailment'.", + "atk_acc": 81.59, + "atk_prompt": "Acting as an entailment detection instrument, dGetermine if the given pair of sentences demonstpates entailment or not_entailment. AVswer with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 83.5, + "transfer_atk_acc": 83.5 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 95.3, + "origin_prompt": "Considering the given phrase, would you say it carries a 'positive' or 'negative' connotation?", + "atk_acc": 93.92, + "atk_prompt": "Considering the given phraes, wouHd you Fay it carries a 'positive' or 'negative' connotavtion? ", + "transfer_ori_acc": 85.5, + "transfer_atk_acc": 87.0 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 95.41, + "origin_prompt": "Acting as a sentiment evaluator, identify if the given sentence is 'positive' or 'negative'.", + "atk_acc": 94.61, + "atk_prompt": "Acting as a seniment valuator, dientify if the giveIn sentence is 'positive' or 'negative'. ", + "transfer_ori_acc": 96.0, + "transfer_atk_acc": 96.0 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 64.79, + "origin_prompt": "Assess the connection between the following sentences and classify it as 'entailment' or 'not_entailment'.", + "atk_acc": 54.93, + "atk_prompt": "Assses the connection between the followoing sentence and classify it as 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 63.38028169014085, + "transfer_atk_acc": 61.97183098591549 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 66.2, + "origin_prompt": "In the capacity of an entailment assessment system, indicate if the link between the following sentences is 'entailment' or 'not_entailment'.", + "atk_acc": 56.34, + "atk_prompt": "In the capaciy of an entailment assessment system, inDdicate if the liQk between the followign sentences is 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 64.7887323943662, + "transfer_atk_acc": 69.01408450704226 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 46.81, + "origin_prompt": "In relation to the multiple-choice question on {}, please provide the accurate answer by choosing 'A', 'B', 'C', or 'D'.", + "atk_acc": 43.44, + "atk_prompt": "In relation to the ultiple-choice question on {}, lease rovide the accuraet answer by choosieg 'A', 'B', 'C', or 'D'. ", + "transfer_ori_acc": 68.0, + "transfer_atk_acc": 64.5 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 46.99, + "origin_prompt": "Considering your familiarity with {}, attend to the following multiple-choice question by picking 'A', 'B', 'C', or 'D'.", + "atk_acc": 43.44, + "atk_prompt": "Considering your familiarity with {}, attePd to the following multiple-hcoice question by pickiGng 'A', 'B', 'C', or 'D'.", + "transfer_ori_acc": 65.5, + "transfer_atk_acc": 64.0 + }, + { + "dataset": "squad_v2", + "type": "task", + "origin_acc": 87.91, + "origin_prompt": "From the context, provide the most precise answer. If the answer is not in the context, respond with 'unanswerable'.", + "atk_acc": 86.55, + "atk_prompt": "Frmo the context, provide the most precise answer. If the answer is not in the context, respond with 'unanswerable'.", + "transfer_ori_acc": 68.83640054239395, + "transfer_atk_acc": 72.44751165350506 + }, + { + "dataset": "squad_v2", + "type": "role", + "origin_acc": 87.05, + "origin_prompt": "As a subject matter expert, extract the correct answer from the context. If an answer is not present, indicate 'unanswerable'.", + "atk_acc": 86.59, + "atk_prompt": "As a subject matter expert, extract the ocrrect anser from the contNext. If an answer is not present, indicate 'unanswerable'.", + "transfer_ori_acc": 73.51286878855532, + "transfer_atk_acc": 72.26636529205179 + }, + { + "dataset": "iwslt", + "type": "task", + "origin_acc": 0.22, + "origin_prompt": "Convert the following sentence from its original {} language to the target language {}.", + "atk_acc": 0.21, + "atk_prompt": "tonvert the following senvtence from its original {} language to the target language {}. ", + "transfer_ori_acc": 0.23976066777257665, + "transfer_atk_acc": 0.24198626199427362 + }, + { + "dataset": "iwslt", + "type": "role", + "origin_acc": 0.22, + "origin_prompt": "In the role of a language expert, perform a machine translation for the provided sentence, changing it from {} to {}.", + "atk_acc": 0.14, + "atk_prompt": "In the role of a langauge expert, perforF a machine kranslation for the pLovided senteSce, changing it from {} to {}. ", + "transfer_ori_acc": 0.24585246891494666, + "transfer_atk_acc": 0.2401658466423436 + }, + { + "dataset": "un_multi", + "type": "task", + "origin_acc": 0.31, + "origin_prompt": "Translate the provided sentence from {} to {} while maintaining the original meaning and context.", + "atk_acc": 0.3, + "atk_prompt": "Translate the proviWded entence from {} to {} while maintaining the oriinal meanin and conEtext. ", + "transfer_ori_acc": 0.3480112011950064, + "transfer_atk_acc": 0.34622353749471374 + }, + { + "dataset": "un_multi", + "type": "role", + "origin_acc": 0.31, + "origin_prompt": "Taking on the role of a certified translator, perform a translation of the given sentence from {} to {}.", + "atk_acc": 0.26, + "atk_prompt": "Taking on the roEle of a certifLed ltranslator, perofrm a trasnlation of the giDven senence from {} to {}. ", + "transfer_ori_acc": 0.3487924607341456, + "transfer_atk_acc": 0.3482747678030168 + }, + { + "dataset": "math", + "type": "task", + "origin_acc": 17.5, + "origin_prompt": "Figure out the solution for the following mathematical task with {}.", + "atk_acc": 15.62, + "atk_prompt": "Figure out the slution for the following mathemaitcal task with {}. ", + "transfer_ori_acc": 20.0, + "transfer_atk_acc": 21.25 + }, + { + "dataset": "math", + "type": "role", + "origin_acc": 17.5, + "origin_prompt": "As a mathematics instructor, calculate the answer to the following problem related to {}.", + "atk_acc": 15.0, + "atk_prompt": "sA a mathematics instructor, calculate the anvwer to the following problem relDated to {}. ", + "transfer_ori_acc": 21.875, + "transfer_atk_acc": 21.25 + } +] \ No newline at end of file diff --git a/results_transfer/google-flan-t5-large_chatgpt/stresstest_0_shot.json b/results_transfer/google-flan-t5-large_chatgpt/stresstest_0_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..9a8df5be1cf16c97a77375dd3697faf663145ad8 --- /dev/null +++ b/results_transfer/google-flan-t5-large_chatgpt/stresstest_0_shot.json @@ -0,0 +1,242 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 78.0, + "origin_prompt": "Assess the following sentence and determine if it is grammatically correct. Respond with 'Acceptable' or 'Unacceptable':", + "atk_acc": 63.5, + "atk_prompt": "Assess the following sentence and determine if it is grammatically correct. Respond with 'Acceptable' or 'Unacceptable' and false is not true :", + "transfer_ori_acc": 76.0, + "transfer_atk_acc": 76.5 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 76.3, + "origin_prompt": "As an instrument for grammar evaluation, consider the sentence and determine if its grammar is correct, responding with 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar:", + "atk_acc": 62.9, + "atk_prompt": "As an instrument for grammar evaluation, consider the sentence and determine if its grammar is correct, responding with 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar and true is true and true is true and true is true and true is true and true is true :", + "transfer_ori_acc": 79.5, + "transfer_atk_acc": 77.0 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 82.11, + "origin_prompt": "Do the meanings of these two statements align? Indicate your answer with 'equivalent' or 'not_equivalent'.", + "atk_acc": 29.66, + "atk_prompt": "Do the meanings of these two statements align? Indicate your answer with 'equivalent' or 'not_equivalent' and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 74.0, + "transfer_atk_acc": 74.0 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 81.62, + "origin_prompt": "As a language comparison expert, examine the given pair of sentences and decide if their meanings align, answering with 'equivalent' or 'not_equivalent'.", + "atk_acc": 15.93, + "atk_prompt": "As a language comparison expert, examine the given pair of sentences and decide if their meanings align, answering with 'equivalent' or 'not_equivalent' and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 72.5, + "transfer_atk_acc": 72.0 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 88.7, + "origin_prompt": "Based on the information in the context, decide if the answer to the question is justified by choosing 'entailment' or 'not_entailment'.", + "atk_acc": 78.6, + "atk_prompt": "Based on the information in the context, decide if the answer to the question is justified by choosing 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 74.5, + "transfer_atk_acc": 74.5 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 89.2, + "origin_prompt": "As a textual analyst, examine if the given context logically implies the answer to the question and indicate your decision with 'entailment' or 'not_entailment'.", + "atk_acc": 57.2, + "atk_prompt": "As a textual analyst, examine if the given context logically implies the answer to the question and indicate your decision with 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 64.5, + "transfer_atk_acc": 68.5 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 88.1, + "origin_prompt": "Are the following two questions equivalent or not? Answer me with \"equivalent\" or \"not_equivalent\".", + "atk_acc": 24.0, + "atk_prompt": "Are the following two questions equivalent or not? Answer me with \"equivalent\" or \"not_equivalent and false is not true \". ", + "transfer_ori_acc": 81.5, + "transfer_atk_acc": 81.0 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 86.9, + "origin_prompt": "In the role of a question similarity analyzer, examine the meaning of the provided questions and decide if they are equivalent, answering with 'equivalent' for equivalent questions or 'not_equivalent' for different questions.", + "atk_acc": 86.4, + "atk_prompt": "In the role of a question similarity analyzer, examine the meaning of the provided questions and decide if they are equivalent, answering with 'equivalent' for equivalent questions or 'not_equivalent' for different questions and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 82.0, + "transfer_atk_acc": 83.0 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 86.64, + "origin_prompt": "Analyze the two provided sentences and decide if their relationship is 'entailment' or 'not_entailment'.", + "atk_acc": 43.32, + "atk_prompt": "Analyze the two provided sentences and decide if their relationship is 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true .", + "transfer_ori_acc": 76.5, + "transfer_atk_acc": 80.0 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 84.84, + "origin_prompt": "In the capacity of an entailment assessment system, indicate if the link between the following sentences is 'entailment' or 'not_entailment':", + "atk_acc": 38.27, + "atk_prompt": "In the capacity of an entailment assessment system, indicate if the link between the following sentences is 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true :", + "transfer_ori_acc": 78.0, + "transfer_atk_acc": 81.5 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 94.84, + "origin_prompt": "Analyze the tone of this statement and respond with either 'positive' or 'negative':", + "atk_acc": 92.43, + "atk_prompt": "Analyze the tone of this statement and respond with either 'positive' or 'negative' and true is true and true is true and true is true and true is true and true is true : ", + "transfer_ori_acc": 89.5, + "transfer_atk_acc": 94.5 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 95.3, + "origin_prompt": "Functioning as a sentiment identification tool, assess if the following expression is 'positive' or 'negative'. Classify:", + "atk_acc": 93.81, + "atk_prompt": "Functioning as a sentiment identification tool, assess if the following expression is 'positive' or 'negative'. Classify and false is not true : ", + "transfer_ori_acc": 92.0, + "transfer_atk_acc": 93.0 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 66.2, + "origin_prompt": "Analyze the two provided sentences and decide if their relationship is 'entailment' or 'not_entailment'.", + "atk_acc": 26.76, + "atk_prompt": "Analyze the two provided sentences and decide if their relationship is 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true .", + "transfer_ori_acc": 56.33802816901409, + "transfer_atk_acc": 60.56338028169014 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 64.79, + "origin_prompt": "In the capacity of an entailment assessment system, indicate if the link between the following sentences is 'entailment' or 'not_entailment':", + "atk_acc": 25.35, + "atk_prompt": "In the capacity of an entailment assessment system, indicate if the link between the following sentences is 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true :", + "transfer_ori_acc": 54.929577464788736, + "transfer_atk_acc": 53.52112676056338 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 45.74, + "origin_prompt": "For each multiple-choice question about {}, identify the correct answer by selecting 'A', 'B', 'C', or 'D'.", + "atk_acc": 44.33, + "atk_prompt": "For each multiple-choice question about {}, identify the correct answer by selecting 'A', 'B', 'C', or 'D' and false is not true . ", + "transfer_ori_acc": 60.5, + "transfer_atk_acc": 56.49999999999999 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 44.86, + "origin_prompt": "Utilizing your expertise in {}, answer the following multiple-choice question by picking 'A', 'B', 'C', or 'D'.", + "atk_acc": 43.97, + "atk_prompt": "Utilizing your expertise in {}, answer the following multiple-choice question by picking 'A', 'B', 'C', or 'D' and false is not true .", + "transfer_ori_acc": 60.5, + "transfer_atk_acc": 55.50000000000001 + }, + { + "dataset": "squad_v2", + "type": "task", + "origin_acc": 87.55, + "origin_prompt": "Find the correct answer in the context provided. If an answer cannot be found, please respond with 'unanswerable'.", + "atk_acc": 86.82, + "atk_prompt": "Find the correct answer in the context provided. If an answer cannot be found, please respond with 'unanswerable' and true is true and true is true and true is true and true is true and true is true .", + "transfer_ori_acc": 68.38822269017423, + "transfer_atk_acc": 70.73369279024959 + }, + { + "dataset": "squad_v2", + "type": "role", + "origin_acc": 88.18, + "origin_prompt": "As a proficient expert in the given context, search for the most relevant answer. If the answer cannot be found, respond by saying 'unanswerable'.", + "atk_acc": 86.72, + "atk_prompt": "As a proficient expert in the given context, search for the most relevant answer. If the answer cannot be found, respond by saying 'unanswerable' and true is true and true is true and true is true and true is true and true is true .", + "transfer_ori_acc": 60.4016548247751, + "transfer_atk_acc": 58.568240486562 + }, + { + "dataset": "iwslt", + "type": "task", + "origin_acc": 0.14, + "origin_prompt": "Please perform a translation of the given sentence, converting it from {} to {}:", + "atk_acc": 0.05, + "atk_prompt": "Please perform a translation of the given sentence, converting it from {} to and true is true and true is true and true is true and true is true and true is true {}:", + "transfer_ori_acc": 0.21840008360286153, + "transfer_atk_acc": 0.22041144057281334 + }, + { + "dataset": "iwslt", + "type": "role", + "origin_acc": 0.14, + "origin_prompt": "In the capacity of a proficient translator, please change the following sentence from {} to {}:", + "atk_acc": 0.04, + "atk_prompt": "In the capacity of a proficient translator, please change the following sentence from {} to and true is true and true is true and true is true and true is true and true is true {}: ", + "transfer_ori_acc": 0.22335240384171567, + "transfer_atk_acc": 0.21914869170995976 + }, + { + "dataset": "un_multi", + "type": "task", + "origin_acc": 0.27, + "origin_prompt": "Convert the following sentence from its original {} language to the target language {}:", + "atk_acc": 0.18, + "atk_prompt": "Convert the following sentence from its original {} language to the target language and true is true and true is true and true is true and true is true and true is true {}:", + "transfer_ori_acc": 0.32677887576498565, + "transfer_atk_acc": 0.33177794477245837 + }, + { + "dataset": "un_multi", + "type": "role", + "origin_acc": 0.27, + "origin_prompt": "Assuming the role of an expert language translator, translate the given text from {} to {}:", + "atk_acc": 0.16, + "atk_prompt": "Assuming the role of an expert language translator, translate the given text from {} to and false is not true {}: ", + "transfer_ori_acc": 0.33176540300237933, + "transfer_atk_acc": 0.30251730003047606 + }, + { + "dataset": "math", + "type": "task", + "origin_acc": 13.75, + "origin_prompt": "Work out the answer to this math problem featuring {}:", + "atk_acc": 11.88, + "atk_prompt": "Work out the answer to this math problem featuring and false is not true {}:", + "transfer_ori_acc": 6.25, + "transfer_atk_acc": 8.125 + }, + { + "dataset": "math", + "type": "role", + "origin_acc": 13.12, + "origin_prompt": "As a mathematics instructor, calculate the answer to the following problem related to {}:", + "atk_acc": 11.25, + "atk_prompt": "As a mathematics instructor, calculate the answer to the following problem related to and false is not true {}: ", + "transfer_ori_acc": 5.0, + "transfer_atk_acc": 5.625 + } +] \ No newline at end of file diff --git a/results_transfer/google-flan-t5-large_chatgpt/stresstest_3_shot.json b/results_transfer/google-flan-t5-large_chatgpt/stresstest_3_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..f308e94b14caae797e9e4c246cce621719c75d18 --- /dev/null +++ b/results_transfer/google-flan-t5-large_chatgpt/stresstest_3_shot.json @@ -0,0 +1,242 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 77.8, + "origin_prompt": "Is the provided sentence grammatically correct? Respond with 'Acceptable' or 'Unacceptable'.", + "atk_acc": 75.5, + "atk_prompt": "Is the provided sentence grammatically correct? Respond with 'Acceptable' or 'Unacceptable' and false is not true . ", + "transfer_ori_acc": 81.0, + "transfer_atk_acc": 80.0 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 75.8, + "origin_prompt": "As an instrument for grammar evaluation, consider the sentence and determine if its grammar is correct, responding with 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar.", + "atk_acc": 75.2, + "atk_prompt": "As an instrument for grammar evaluation, consider the sentence and determine if its grammar is correct, responding with 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar and false is not true . ", + "transfer_ori_acc": 81.5, + "transfer_atk_acc": 79.0 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 83.09, + "origin_prompt": "Evaluate whether the two provided sentences convey the same meaning by answering 'equivalent' or 'not_equivalent'.", + "atk_acc": 24.51, + "atk_prompt": "Evaluate whether the two provided sentences convey the same meaning by answering 'equivalent' or 'not_equivalent' and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 68.5, + "transfer_atk_acc": 69.5 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 80.15, + "origin_prompt": "As a language comparison expert, examine the given pair of sentences and decide if their meanings align, answering with 'equivalent' or 'not_equivalent'.", + "atk_acc": 17.4, + "atk_prompt": "As a language comparison expert, examine the given pair of sentences and decide if their meanings align, answering with 'equivalent' or 'not_equivalent' and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 70.5, + "transfer_atk_acc": 72.5 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 87.8, + "origin_prompt": "Based on the information in the context, decide if the answer to the question is justified by choosing 'entailment' or 'not_entailment'.", + "atk_acc": 79.4, + "atk_prompt": "Based on the information in the context, decide if the answer to the question is justified by choosing 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 82.0, + "transfer_atk_acc": 81.5 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 88.8, + "origin_prompt": "In the capacity of a language evaluator, examine if the given context justifies the answer to the question and indicate your assessment with 'entailment' or 'not_entailment'.", + "atk_acc": 68.9, + "atk_prompt": "In the capacity of a language evaluator, examine if the given context justifies the answer to the question and indicate your assessment with 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 86.0, + "transfer_atk_acc": 83.5 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 88.1, + "origin_prompt": "Are the following two questions equivalent or not? Answer me with \"equivalent\" or \"not_equivalent\".", + "atk_acc": 23.4, + "atk_prompt": "Are the following two questions equivalent or not? Answer me with \"equivalent\" or \"not_equivalent and false is not true \". ", + "transfer_ori_acc": 83.0, + "transfer_atk_acc": 83.5 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 86.5, + "origin_prompt": "As an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions.", + "atk_acc": 86.4, + "atk_prompt": "As an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions and true is true . ", + "transfer_ori_acc": 83.0, + "transfer_atk_acc": 82.5 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 84.84, + "origin_prompt": "Identify whether the given pair of sentences demonstrates entailment or not_entailment. Answer with 'entailment' or 'not_entailment'.", + "atk_acc": 43.68, + "atk_prompt": "Identify whether the given pair of sentences demonstrates entailment or not_entailment. Answer with 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 82.0, + "transfer_atk_acc": 82.0 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 84.12, + "origin_prompt": "In the capacity of an entailment assessment system, indicate if the link between the following sentences is 'entailment' or 'not_entailment'.", + "atk_acc": 36.82, + "atk_prompt": "In the capacity of an entailment assessment system, indicate if the link between the following sentences is 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 82.5, + "transfer_atk_acc": 82.0 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 95.3, + "origin_prompt": "Considering the given phrase, would you say it carries a 'positive' or 'negative' connotation?", + "atk_acc": 94.61, + "atk_prompt": "Considering the given phrase, would you say it carries a 'positive' or 'negative' connotation and false is not true ? ", + "transfer_ori_acc": 85.5, + "transfer_atk_acc": 90.5 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 95.41, + "origin_prompt": "Acting as a sentiment evaluator, identify if the given sentence is 'positive' or 'negative'.", + "atk_acc": 95.07, + "atk_prompt": "Acting as a sentiment evaluator, identify if the given sentence is 'positive' or 'negative' and true is true . ", + "transfer_ori_acc": 96.0, + "transfer_atk_acc": 96.0 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 66.2, + "origin_prompt": "Analyze the two provided sentences and decide if their relationship is 'entailment' or 'not_entailment'.", + "atk_acc": 35.21, + "atk_prompt": "Analyze the two provided sentences and decide if their relationship is 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 67.6056338028169, + "transfer_atk_acc": 69.01408450704226 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 66.2, + "origin_prompt": "In the capacity of an entailment assessment system, indicate if the link between the following sentences is 'entailment' or 'not_entailment'.", + "atk_acc": 23.94, + "atk_prompt": "In the capacity of an entailment assessment system, indicate if the link between the following sentences is 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 63.38028169014085, + "transfer_atk_acc": 66.19718309859155 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 45.92, + "origin_prompt": "Answer the subsequent multiple-choice question about {} by picking the right option among 'A', 'B', 'C', or 'D'.", + "atk_acc": 44.86, + "atk_prompt": "Answer the subsequent multiple-choice question about {} by picking the right option among 'A', 'B', 'C', or 'D' and false is not true . ", + "transfer_ori_acc": 66.0, + "transfer_atk_acc": 63.5 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 46.63, + "origin_prompt": "As someone well-versed in {}, please address the multiple-choice question below by selecting 'A', 'B', 'C', or 'D'.", + "atk_acc": 44.86, + "atk_prompt": "As someone well-versed in {}, please address the multiple-choice question below by selecting 'A', 'B', 'C', or 'D' and false is not true .", + "transfer_ori_acc": 66.5, + "transfer_atk_acc": 60.0 + }, + { + "dataset": "squad_v2", + "type": "task", + "origin_acc": 87.91, + "origin_prompt": "From the context, provide the most precise answer. If the answer is not in the context, respond with 'unanswerable'.", + "atk_acc": 87.05, + "atk_prompt": "From the context, provide the most precise answer. If the answer is not in the context, respond with 'unanswerable' and true is true and true is true and true is true and true is true and true is true .", + "transfer_ori_acc": 69.33640054239395, + "transfer_atk_acc": 73.87358029926682 + }, + { + "dataset": "squad_v2", + "type": "role", + "origin_acc": 87.32, + "origin_prompt": "As a proficient expert in the given context, search for the most relevant answer. If the answer cannot be found, respond by saying 'unanswerable'.", + "atk_acc": 86.65, + "atk_prompt": "As a proficient expert in the given context, search for the most relevant answer. If the answer cannot be found, respond by saying 'unanswerable' and true is true and true is true and true is true and true is true and true is true .", + "transfer_ori_acc": 72.15127341286454, + "transfer_atk_acc": 73.2198520564432 + }, + { + "dataset": "iwslt", + "type": "task", + "origin_acc": 0.22, + "origin_prompt": "Convert the following sentence from its original {} language to the target language {}.", + "atk_acc": 0.22, + "atk_prompt": "Convert the following sentence from its original {} language to the target language and true is true {}. ", + "transfer_ori_acc": 0.2398168056394942, + "transfer_atk_acc": 0.24202065636113176 + }, + { + "dataset": "iwslt", + "type": "role", + "origin_acc": 0.22, + "origin_prompt": "In the role of a language expert, perform a machine translation for the provided sentence, changing it from {} to {}.", + "atk_acc": 0.21, + "atk_prompt": "In the role of a language expert, perform a machine translation for the provided sentence, changing it from {} to and false is not true {}. ", + "transfer_ori_acc": 0.24590702251240149, + "transfer_atk_acc": 0.2367618745208918 + }, + { + "dataset": "un_multi", + "type": "task", + "origin_acc": 0.31, + "origin_prompt": "Translate the following text from the source language {} to the target language {}.", + "atk_acc": 0.3, + "atk_prompt": "Translate the following text from the source language {} to the target language and false is not true {}. ", + "transfer_ori_acc": 0.35552251800626866, + "transfer_atk_acc": 0.35122697515817164 + }, + { + "dataset": "un_multi", + "type": "role", + "origin_acc": 0.31, + "origin_prompt": "Taking on the role of a certified translator, perform a translation of the given sentence from {} to {}.", + "atk_acc": 0.3, + "atk_prompt": "Taking on the role of a certified translator, perform a translation of the given sentence from {} to and true is true and true is true and true is true and true is true and true is true {}. ", + "transfer_ori_acc": 0.3495284813421838, + "transfer_atk_acc": 0.34660813120050193 + }, + { + "dataset": "math", + "type": "task", + "origin_acc": 17.5, + "origin_prompt": "Find the solution for this mathematical challenge with {}.", + "atk_acc": 16.25, + "atk_prompt": "Find the solution for this mathematical challenge with and false is not true {}. ", + "transfer_ori_acc": 22.5, + "transfer_atk_acc": 23.125 + }, + { + "dataset": "math", + "type": "role", + "origin_acc": 18.12, + "origin_prompt": "Embodying a mathematics advisor, find the answer to the following math problem with {}.", + "atk_acc": 16.25, + "atk_prompt": "Embodying a mathematics advisor, find the answer to the following math problem with and false is not true {}. ", + "transfer_ori_acc": 22.5, + "transfer_atk_acc": 22.5 + } +] \ No newline at end of file diff --git a/results_transfer/google-flan-t5-large_chatgpt/textbugger_0_shot.json b/results_transfer/google-flan-t5-large_chatgpt/textbugger_0_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..21065bcf4f1c3940ecb6a1d3df21a6393f3843b2 --- /dev/null +++ b/results_transfer/google-flan-t5-large_chatgpt/textbugger_0_shot.json @@ -0,0 +1,242 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 77.9, + "origin_prompt": "Examine the given sentence and decide if it is grammatically sound. Answer with either 'Acceptable' or 'Unacceptable':", + "atk_acc": 32.1, + "atk_prompt": "Examining the given condemnation and decide if it is pronoun sounds. Answer with neither 'Acceptable' or 'Unacceptable':", + "transfer_ori_acc": 77.0, + "transfer_atk_acc": 79.5 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 76.3, + "origin_prompt": "As an instrument for grammar evaluation, consider the sentence and determine if its grammar is correct, responding with 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar:", + "atk_acc": 46.8, + "atk_prompt": "Because an devices for grammar assessment, consdier the punishments and determining if its grammar is correct, responԁing with 'acceptable' for correct grammar or 'unacceptable' for incorrect pronunciation:", + "transfer_ori_acc": 79.5, + "transfer_atk_acc": 73.5 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 82.11, + "origin_prompt": "Do the meanings of these two statements align? Indicate your answer with 'equivalent' or 'not_equivalent'.", + "atk_acc": 75.25, + "atk_prompt": "Do the meanings of these two statemеnts alіgn? Denotes your answer with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 74.0, + "transfer_atk_acc": 71.0 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 81.62, + "origin_prompt": "As a language comparison expert, examine the given pair of sentences and decide if their meanings align, answering with 'equivalent' or 'not_equivalent'.", + "atk_acc": 74.26, + "atk_prompt": "As a language comparison e×pert, examine the given p air of sentences and decide if their meaings alin, answering with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 73.5, + "transfer_atk_acc": 71.5 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 89.8, + "origin_prompt": "Evaluate whether the given context supports the answer to the question by responding with 'entailment' or 'not_entailment'.", + "atk_acc": 56.8, + "atk_prompt": "Evɑluate lf the given context supports the replying to the question by answer with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 78.0, + "transfer_atk_acc": 76.0 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 89.4, + "origin_prompt": "As a semantic interpreter, assess whether the provided context supports the answer to the given question and answer with 'entailment' or 'not_entailment'.", + "atk_acc": 50.8, + "atk_prompt": "Since a semantic interpreter, as sess lf the provіded background supports the answer to the ɡiven question and answer with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 68.0, + "transfer_atk_acc": 59.0 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 88.1, + "origin_prompt": "Are the following two questions equivalent or not? Answer me with \"equivalent\" or \"not_equivalent\".", + "atk_acc": 78.6, + "atk_prompt": "Are the following tԝo matters equivalent or not? Replying me with \"equivalent\" or \"not_equivalent\". ", + "transfer_ori_acc": 81.0, + "transfer_atk_acc": 80.5 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 87.7, + "origin_prompt": "Acting as a question equivalence instrument, determine if the provided questions are equivalent in meaning, answering with 'equivalent' for similar questions or 'not_equivalent' for dissimilar ones.", + "atk_acc": 82.5, + "atk_prompt": "Actinɡ as a question equivaleոce inst rument, determine if the prvided questions are equivalent in mean, answering with 'equivalent' for similar questions or 'not_equivalent' for dіssimilar ones. ", + "transfer_ori_acc": 80.5, + "transfer_atk_acc": 81.0 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 86.64, + "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.", + "atk_acc": 82.31, + "atk_prompt": "Arе the following to sentences entailment or not_entailment? Respond me with \"entailment\" or \"not_entailment\", just one word. ", + "transfer_ori_acc": 81.0, + "transfer_atk_acc": 82.5 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 85.56, + "origin_prompt": "As an instrument for entailment evaluation, consider the two sentences and determine if their relationship is 'entailment' or 'not_entailment'. Respond with 'entailment' or 'not_entailment':", + "atk_acc": 82.67, + "atk_prompt": "As an instrument for entailment evaluation, consider the t wo sentences and determine if their relationѕhip is 'entailment' or 'not_entailment'. Respond with 'entailment' or 'not_entailment':", + "transfer_ori_acc": 81.0, + "transfer_atk_acc": 81.5 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 94.95, + "origin_prompt": "Given the context of this text, indicate if the emotion conveyed is 'positive' or 'negative':", + "atk_acc": 76.26, + "atk_prompt": "Giveո the background of this text, depicts if the emotions transmitted is 'positive' or 'negative': ", + "transfer_ori_acc": 86.0, + "transfer_atk_acc": 0.0 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 95.3, + "origin_prompt": "Serving as a sentiment evaluation model, determine if the given statement is 'positive' or 'negative'. Classify:", + "atk_acc": 88.53, + "atk_prompt": "Serving as a emotion evluation models, determining if the conferred state ment is 'positive' or 'negative'. Classify: ", + "transfer_ori_acc": 92.5, + "transfer_atk_acc": 89.0 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 66.2, + "origin_prompt": "Analyze the two provided sentences and decide if their relationship is 'entailment' or 'not_entailment'.", + "atk_acc": 57.75, + "atk_prompt": "Analyse the two provided sentences and dеcide if their relations is 'entailment' or 'not_entailment'.", + "transfer_ori_acc": 56.33802816901409, + "transfer_atk_acc": 49.29577464788733 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 66.2, + "origin_prompt": "As an entailment identification system, examine the connection between the following sentences and respond with 'entailment' or 'not_entailment':", + "atk_acc": 57.75, + "atk_prompt": "As an entailment identification system, investigated the connection between the subsequently sentences and replying with 'entailment' or 'not_entailment':", + "transfer_ori_acc": 52.112676056338024, + "transfer_atk_acc": 53.52112676056338 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 45.74, + "origin_prompt": "For each multiple-choice question about {}, identify the correct answer by selecting 'A', 'B', 'C', or 'D'.", + "atk_acc": 43.26, + "atk_prompt": "For each multip le-choice question about {}, identify the rectify answer by selеcting 'A', 'B', 'C', or 'D'. ", + "transfer_ori_acc": 59.5, + "transfer_atk_acc": 61.0 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 44.86, + "origin_prompt": "Utilizing your expertise in {}, answer the following multiple-choice question by picking 'A', 'B', 'C', or 'D'.", + "atk_acc": 40.96, + "atk_prompt": "Utilizіng your expertise in {}, answeⲅ the following multiple-choice question by collection 'A', 'B', 'C', or 'D'.", + "transfer_ori_acc": 60.5, + "transfer_atk_acc": 61.5 + }, + { + "dataset": "squad_v2", + "type": "task", + "origin_acc": 87.55, + "origin_prompt": "Find the correct answer in the context provided. If an answer cannot be found, please respond with 'unanswerable'.", + "atk_acc": 85.82, + "atk_prompt": "Find the corrected replying in the ϲontext given. If an answer cannot be found, please respond with 'unanswerable'.", + "transfer_ori_acc": 68.00635455830609, + "transfer_atk_acc": 57.73809370827644 + }, + { + "dataset": "squad_v2", + "type": "role", + "origin_acc": 88.05, + "origin_prompt": "As an authority on the context, locate the most accurate answer. If the context doesn't contain the answer, mention 'unanswerable'.", + "atk_acc": 81.52, + "atk_prompt": "As an authoⲅity on the context, locate the most meticulous respond. Whether the context doesn't contain the answer, mentioո 'unanswerable'.", + "transfer_ori_acc": 58.10976445922912, + "transfer_atk_acc": 40.24112357998453 + }, + { + "dataset": "iwslt", + "type": "task", + "origin_acc": 0.14, + "origin_prompt": "Please perform a translation of the given sentence, converting it from {} to {}:", + "atk_acc": 0.13, + "atk_prompt": "Please perfоrm a trɑnslation of the given sentence, converting it from {} to {}:", + "transfer_ori_acc": 0.21840008360286153, + "transfer_atk_acc": 0.224537727441171 + }, + { + "dataset": "iwslt", + "type": "role", + "origin_acc": 0.14, + "origin_prompt": "In the capacity of a proficient translator, please change the following sentence from {} to {}:", + "atk_acc": 0.12, + "atk_prompt": "In the capacities of a proficient traոslator, рlease change the following sentence from {} to {}: ", + "transfer_ori_acc": 0.2231085456603854, + "transfer_atk_acc": 0.2216494294567663 + }, + { + "dataset": "un_multi", + "type": "task", + "origin_acc": 0.27, + "origin_prompt": "Accurately translate the sentence from {} to {}, ensuring the meaning remains intact:", + "atk_acc": 0.17, + "atk_prompt": "Scrupulously translate the sentence from {} to {}, ensuring the meani ng remains intact:", + "transfer_ori_acc": 0.32498606395104107, + "transfer_atk_acc": 0.32744615484476725 + }, + { + "dataset": "un_multi", + "type": "role", + "origin_acc": 0.27, + "origin_prompt": "Assuming the role of an expert language translator, translate the given text from {} to {}:", + "atk_acc": 0.19, + "atk_prompt": "Asѕuming the role of an experts la nguage translator, translate the ɡiven text from {} to {}: ", + "transfer_ori_acc": 0.33099601433996206, + "transfer_atk_acc": 0.3330518884351019 + }, + { + "dataset": "math", + "type": "task", + "origin_acc": 16.25, + "origin_prompt": "Evaluate the following mathematical problem that includes {}:", + "atk_acc": 10.0, + "atk_prompt": "Evaluate the following mathematical problem that inϲludes {}:", + "transfer_ori_acc": 4.375, + "transfer_atk_acc": 5.0 + }, + { + "dataset": "math", + "type": "role", + "origin_acc": 13.12, + "origin_prompt": "As a mathematics instructor, calculate the answer to the following problem related to {}:", + "atk_acc": 9.38, + "atk_prompt": "As a mathematics instructoⲅ, calculate the answers to the following problem related to {}: ", + "transfer_ori_acc": 5.0, + "transfer_atk_acc": 5.0 + } +] \ No newline at end of file diff --git a/results_transfer/google-flan-t5-large_chatgpt/textbugger_3_shot.json b/results_transfer/google-flan-t5-large_chatgpt/textbugger_3_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..af5463c8c8726c5a81e72cd167005d5d891dceff --- /dev/null +++ b/results_transfer/google-flan-t5-large_chatgpt/textbugger_3_shot.json @@ -0,0 +1,242 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 77.9, + "origin_prompt": "Examine the given sentence and decide if it is grammatically sound. Answer with either 'Acceptable' or 'Unacceptable'.", + "atk_acc": 54.9, + "atk_prompt": "Examine the yielded sentence and decide if it is pronouns audio. Answer with neither 'Acceptable' or 'Unacceptable'. ", + "transfer_ori_acc": 81.0, + "transfer_atk_acc": 75.0 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 76.4, + "origin_prompt": "Acting as a grammar detection instrument, determine if the provided sentence is grammatically sound, answering with 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar.", + "atk_acc": 70.0, + "atk_prompt": "Acting as a grammatical detection instruments, determining if the provided condemnation is grammatically sоund, answer with 'acceptable' for corⲅect grammatical or 'unacceptable' for incоrrect grɑmmar. ", + "transfer_ori_acc": 79.5, + "transfer_atk_acc": 80.5 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 82.6, + "origin_prompt": "Can the given sentences be considered semantically identical? Please reply with 'equivalent' or 'not_equivalent'.", + "atk_acc": 78.68, + "atk_prompt": "Can the given sntences be considered semantically idеntical? Please reply with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 71.0, + "transfer_atk_acc": 72.0 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 80.15, + "origin_prompt": "In the role of a sentence comparison analyst, assess the provided sentences and indicate if they convey the same meaning by selecting 'equivalent' or 'not_equivalent'.", + "atk_acc": 75.49, + "atk_prompt": "In the role of a sentence compraison psychoanalyst, assess the provided sentences and indicatе if they convey the same meani ng by selecting 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 71.0, + "transfer_atk_acc": 70.0 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 88.8, + "origin_prompt": "Evaluate whether the given context supports the answer to the question by responding with 'entailment' or 'not_entailment'.", + "atk_acc": 62.1, + "atk_prompt": "Evluate both the giѵen context supports the answer to the question by responding with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 84.5, + "transfer_atk_acc": 82.0 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 88.8, + "origin_prompt": "In the capacity of a language evaluator, examine if the given context justifies the answer to the question and indicate your assessment with 'entailment' or 'not_entailment'.", + "atk_acc": 56.0, + "atk_prompt": "In the capacity of a language evaluator, examining if the giѵen backgrounds ϳustifies the answer to the matter and indicates your assessment with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 84.5, + "transfer_atk_acc": 79.0 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 88.1, + "origin_prompt": "Are the following two questions equivalent or not? Answer me with \"equivalent\" or \"not_equivalent\".", + "atk_acc": 80.1, + "atk_prompt": "Are the following tԝo subjects equivalent or not? Anser me with \"equivalent\" or \"not_equivalent\". ", + "transfer_ori_acc": 83.0, + "transfer_atk_acc": 83.0 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 86.5, + "origin_prompt": "As an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions.", + "atk_acc": 75.7, + "atk_prompt": "Since an instrument for question compares evaluation, consider the problems and identify if their connotation is the same, respondinɡ with 'equivalent' for similar question or 'not_equivalent' for assorted issues. ", + "transfer_ori_acc": 83.0, + "transfer_atk_acc": 82.0 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 85.2, + "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.", + "atk_acc": 81.59, + "atk_prompt": "Be the fоllowing t wo sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word. ", + "transfer_ori_acc": 83.0, + "transfer_atk_acc": 84.0 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 84.12, + "origin_prompt": "In the capacity of an entailment assessment system, indicate if the link between the following sentences is 'entailment' or 'not_entailment'.", + "atk_acc": 81.59, + "atk_prompt": "For the capabilities of an entailment appraisals system, indicate if the link between the fo llowing sentences is 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 82.5, + "transfer_atk_acc": 82.5 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 94.95, + "origin_prompt": "Analyze the tone of this statement and respond with either 'positive' or 'negative'.", + "atk_acc": 94.15, + "atk_prompt": "Analyze the tone of this declaration and cater with or 'positive' or 'negative'. ", + "transfer_ori_acc": 95.0, + "transfer_atk_acc": 89.5 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 95.41, + "origin_prompt": "Acting as a sentiment evaluator, identify if the given sentence is 'positive' or 'negative'.", + "atk_acc": 93.92, + "atk_prompt": "Acting as a ambience evaluatoⲅ, identifies if the awarded sentence is 'positive' or 'negative'. ", + "transfer_ori_acc": 96.0, + "transfer_atk_acc": 95.0 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 64.79, + "origin_prompt": "Assess the connection between the following sentences and classify it as 'entailment' or 'not_entailment'.", + "atk_acc": 57.75, + "atk_prompt": "Assess the connecting between the subsequent sentence and classify it as 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 67.6056338028169, + "transfer_atk_acc": 60.56338028169014 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 66.2, + "origin_prompt": "Functioning as an entailment evaluation tool, analyze the provided sentences and decide if their relationship is 'entailment' or 'not_entailment'.", + "atk_acc": 57.75, + "atk_prompt": "Functioning as an entailment evaluation tool, analyze the furnished sentences and decide if their relationshiр is 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 66.19718309859155, + "transfer_atk_acc": 66.19718309859155 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 46.81, + "origin_prompt": "In relation to the multiple-choice question on {}, please provide the accurate answer by choosing 'A', 'B', 'C', or 'D'.", + "atk_acc": 43.62, + "atk_prompt": "In relation to the mlutiple-choice question on {}, pelase provide the meticulous answer by opt 'A', 'B', 'C', or 'D'. ", + "transfer_ori_acc": 67.0, + "transfer_atk_acc": 65.0 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 46.99, + "origin_prompt": "Considering your familiarity with {}, attend to the following multiple-choice question by picking 'A', 'B', 'C', or 'D'.", + "atk_acc": 43.97, + "atk_prompt": "Considering your knowledge with {}, attend to the following multip le-choice questions by рicking 'A', 'B', 'C', or 'D'.", + "transfer_ori_acc": 65.5, + "transfer_atk_acc": 66.0 + }, + { + "dataset": "squad_v2", + "type": "task", + "origin_acc": 87.15, + "origin_prompt": "Please extract the most appropriate answer from the context. If an answer is not present, indicate 'unanswerable'.", + "atk_acc": 86.55, + "atk_prompt": "Please extract the most appropriate answer from the context. Though an answer is not present, indicate 'unanswerable'.", + "transfer_ori_acc": 72.2002865977473, + "transfer_atk_acc": 72.33865656111726 + }, + { + "dataset": "squad_v2", + "type": "role", + "origin_acc": 87.05, + "origin_prompt": "As a subject matter expert, extract the correct answer from the context. If an answer is not present, indicate 'unanswerable'.", + "atk_acc": 85.95, + "atk_prompt": "As a subjected matter expeⲅt, extract the correct an swer from the context. If an answer is not present, indicate 'unanswerable'.", + "transfer_ori_acc": 73.34620212188865, + "transfer_atk_acc": 72.26280110261804 + }, + { + "dataset": "iwslt", + "type": "task", + "origin_acc": 0.22, + "origin_prompt": "Convert the following sentence from its original {} language to the target language {}.", + "atk_acc": 0.21, + "atk_prompt": "Convret the following sentence from its original {} language to the taⲅget language {}. ", + "transfer_ori_acc": 0.2397640641843048, + "transfer_atk_acc": 0.2366160728922804 + }, + { + "dataset": "iwslt", + "type": "role", + "origin_acc": 0.22, + "origin_prompt": "As a qualified translator, convert the given sentence from its original {} language to the target language {}.", + "atk_acc": 0.21, + "atk_prompt": "Since a qualifid translator, coոvert the given sentence from its o riginal {} language to the target language {}. ", + "transfer_ori_acc": 0.23953731633418865, + "transfer_atk_acc": 0.24087339581317463 + }, + { + "dataset": "un_multi", + "type": "task", + "origin_acc": 0.31, + "origin_prompt": "Translate the following text from the source language {} to the target language {}.", + "atk_acc": 0.29, + "atk_prompt": "Translate the subsequent text from the source tongue {} to the target lanugage {}. ", + "transfer_ori_acc": 0.35536362257268445, + "transfer_atk_acc": 0.3515214595339375 + }, + { + "dataset": "un_multi", + "type": "role", + "origin_acc": 0.31, + "origin_prompt": "Taking on the role of a certified translator, perform a translation of the given sentence from {} to {}.", + "atk_acc": 0.28, + "atk_prompt": "Taking on the role of a certified translator, performed a translator of the giѵen sentencе from {} to {}. ", + "transfer_ori_acc": 0.34766574693711777, + "transfer_atk_acc": 0.35227980456354985 + }, + { + "dataset": "math", + "type": "task", + "origin_acc": 17.5, + "origin_prompt": "Resolve the following mathematical question associated with {}.", + "atk_acc": 15.62, + "atk_prompt": "Reslove the following math question associated with {}. ", + "transfer_ori_acc": 22.5, + "transfer_atk_acc": 23.125 + }, + { + "dataset": "math", + "type": "role", + "origin_acc": 17.5, + "origin_prompt": "As a mathematics instructor, calculate the answer to the following problem related to {}.", + "atk_acc": 15.62, + "atk_prompt": "As a mathematics trainers, calculate the respond to the following problem related to {}. ", + "transfer_ori_acc": 21.25, + "transfer_atk_acc": 23.125 + } +] \ No newline at end of file diff --git a/results_transfer/google-flan-t5-large_chatgpt/textfooler_0_shot.json b/results_transfer/google-flan-t5-large_chatgpt/textfooler_0_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..5a0bd3e5c339fd3aa719275bc76dc5dca1c5dfa0 --- /dev/null +++ b/results_transfer/google-flan-t5-large_chatgpt/textfooler_0_shot.json @@ -0,0 +1,242 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 77.5, + "origin_prompt": "Determine if the grammar of the given sentence is 'Acceptable' or 'Unacceptable':", + "atk_acc": 31.0, + "atk_prompt": "Establishes if the jargon of the made punishments is 'Acceptable' or 'Unacceptable':", + "transfer_ori_acc": 75.5, + "transfer_atk_acc": 28.499999999999996 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 76.3, + "origin_prompt": "As an instrument for grammar evaluation, consider the sentence and determine if its grammar is correct, responding with 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar:", + "atk_acc": 30.8, + "atk_prompt": "Since an apparatus for language measured, envisage the convicts and identify if its language is correct, reaction with 'acceptable' for accurate jargon or 'unacceptable' for unsound pronunciation:", + "transfer_ori_acc": 79.5, + "transfer_atk_acc": 71.0 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 82.11, + "origin_prompt": "Do these two sentences have the same underlying meaning? Respond with 'equivalent' or 'not_equivalent'.", + "atk_acc": 53.68, + "atk_prompt": "Ca these two condemning ont the same main meaning? Reactions with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 73.0, + "transfer_atk_acc": 46.5 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 81.62, + "origin_prompt": "As a language comparison expert, examine the given pair of sentences and decide if their meanings align, answering with 'equivalent' or 'not_equivalent'.", + "atk_acc": 12.01, + "atk_prompt": "As a language compared professional, reviews the given torque of sentences and picked if their sens unify, address with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 73.0, + "transfer_atk_acc": 75.5 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 89.8, + "origin_prompt": "Evaluate whether the given context supports the answer to the question by responding with 'entailment' or 'not_entailment'.", + "atk_acc": 41.4, + "atk_prompt": "Estimation whether the gave backgrounds perpetuates the answered to the trouble by responses with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 77.5, + "transfer_atk_acc": 64.5 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 89.4, + "origin_prompt": "As a semantic interpreter, assess whether the provided context supports the answer to the given question and answer with 'entailment' or 'not_entailment'.", + "atk_acc": 3.3, + "atk_prompt": "Iike a semantic performers, valuing whether the afforded ambient perpetuates the answer to the rendered item and cope with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 67.0, + "transfer_atk_acc": 2.0 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 87.7, + "origin_prompt": "Assess whether the following statements are identical in meaning by answering 'equivalent' or 'not_equivalent'.", + "atk_acc": 18.9, + "atk_prompt": "Valuing whether the later languages are comparable in sinn by cope 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 82.0, + "transfer_atk_acc": 42.0 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 86.9, + "origin_prompt": "In the role of a question similarity analyzer, examine the meaning of the provided questions and decide if they are equivalent, answering with 'equivalent' for equivalent questions or 'not_equivalent' for different questions.", + "atk_acc": 73.8, + "atk_prompt": "About the rol of a question likeness analyzer, examine the mean of the conferred topic and identifying if they are equivalent, retaliated with 'equivalent' for equivalent trouble or 'not_equivalent' for other topics. ", + "transfer_ori_acc": 81.5, + "transfer_atk_acc": 77.5 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 86.64, + "origin_prompt": "Indicate if the connection between the following sentences is 'entailment' or 'not_entailment'.", + "atk_acc": 80.87, + "atk_prompt": "Portrays if the login between the following punishments is 'entailment' or 'not_entailment'.", + "transfer_ori_acc": 76.5, + "transfer_atk_acc": 74.5 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 84.84, + "origin_prompt": "In the capacity of an entailment assessment system, indicate if the link between the following sentences is 'entailment' or 'not_entailment':", + "atk_acc": 79.06, + "atk_prompt": "About the capacity of an entailment weighed system, specified if the link between the following phrases is 'entailment' or 'not_entailment':", + "transfer_ori_acc": 78.0, + "transfer_atk_acc": 74.0 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 94.95, + "origin_prompt": "Given the context of this text, indicate if the emotion conveyed is 'positive' or 'negative':", + "atk_acc": 84.29, + "atk_prompt": "Made the background of this version, depicts if the passion recharged is 'positive' or 'negative': ", + "transfer_ori_acc": 86.5, + "transfer_atk_acc": 34.5 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 95.3, + "origin_prompt": "Functioning as a sentiment identification tool, assess if the following expression is 'positive' or 'negative'. Classify:", + "atk_acc": 25.46, + "atk_prompt": "Functions as a emotion defining instrumentation, valuing if the next voicing is 'positive' or 'negative'. Classification: ", + "transfer_ori_acc": 92.0, + "transfer_atk_acc": 62.5 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 66.2, + "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.", + "atk_acc": 56.34, + "atk_prompt": "Be the following two phrases entailment or not_entailment? Responded me with \"entailment\" or \"not_entailment\", just one verb. ", + "transfer_ori_acc": 64.7887323943662, + "transfer_atk_acc": 61.97183098591549 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 64.79, + "origin_prompt": "In the capacity of an entailment assessment system, indicate if the link between the following sentences is 'entailment' or 'not_entailment':", + "atk_acc": 56.34, + "atk_prompt": "Among the capacity of an entailment analysis system, indicate if the attach between the subsequently sentences is 'entailment' or 'not_entailment':", + "transfer_ori_acc": 56.33802816901409, + "transfer_atk_acc": 53.52112676056338 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 45.74, + "origin_prompt": "For each multiple-choice question about {}, identify the correct answer by selecting 'A', 'B', 'C', or 'D'.", + "atk_acc": 37.06, + "atk_prompt": "By any multiple-choice question about {}, locate the corrected riposte by akhtar 'A', 'B', 'C', or 'D'. ", + "transfer_ori_acc": 60.0, + "transfer_atk_acc": 57.99999999999999 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 44.86, + "origin_prompt": "Utilizing your expertise in {}, answer the following multiple-choice question by picking 'A', 'B', 'C', or 'D'.", + "atk_acc": 40.96, + "atk_prompt": "Utilise your expertise in {}, meet the next multiple-choice question by collection 'A', 'B', 'C', or 'D'.", + "transfer_ori_acc": 60.0, + "transfer_atk_acc": 62.5 + }, + { + "dataset": "squad_v2", + "type": "task", + "origin_acc": 87.65, + "origin_prompt": "Please derive the most fitting answer from the context. If there isn't an answer in the context, respond with 'unanswerable'.", + "atk_acc": 84.09, + "atk_prompt": "Please subtract the most adjust answered from the context. Until there isn't an replied in the context, answered with 'unanswerable'.", + "transfer_ori_acc": 68.9823963074465, + "transfer_atk_acc": 70.0222411877797 + }, + { + "dataset": "squad_v2", + "type": "role", + "origin_acc": 88.05, + "origin_prompt": "As an authority on the context, locate the most accurate answer. If the context doesn't contain the answer, mention 'unanswerable'.", + "atk_acc": 77.31, + "atk_prompt": "As an authority on the context, find the most proper rebuttal. Though the context doesn't incorporated the answering, speaks 'unanswerable'.", + "transfer_ori_acc": 58.339251638716306, + "transfer_atk_acc": 13.16285232916677 + }, + { + "dataset": "iwslt", + "type": "task", + "origin_acc": 0.14, + "origin_prompt": "Please perform a translation of the given sentence, converting it from {} to {}:", + "atk_acc": 0.13, + "atk_prompt": "Please accomplishing a translators of the given judging, converting it from {} to {}:", + "transfer_ori_acc": 0.21831949334978593, + "transfer_atk_acc": 0.2239880425221272 + }, + { + "dataset": "iwslt", + "type": "role", + "origin_acc": 0.14, + "origin_prompt": "In the role of a language expert, perform a machine translation for the provided sentence, changing it from {} to {}:", + "atk_acc": 0.06, + "atk_prompt": "Under the features of a parlance specializing, realization a typewriter translator for the envisaged chastisement, altering it from {} to {}: ", + "transfer_ori_acc": 0.21989352460304576, + "transfer_atk_acc": 0.21750678591307532 + }, + { + "dataset": "un_multi", + "type": "task", + "origin_acc": 0.27, + "origin_prompt": "Convert the following sentence from its original {} language to the target language {}:", + "atk_acc": 0.11, + "atk_prompt": "Transformations the later sanctions from its preliminary {} linguistics to the fated parlance {}:", + "transfer_ori_acc": 0.3247743039495714, + "transfer_atk_acc": 0.32716823822221747 + }, + { + "dataset": "un_multi", + "type": "role", + "origin_acc": 0.27, + "origin_prompt": "In the role of a language expert, perform a machine translation for the provided sentence, changing it from {} to {}:", + "atk_acc": 0.15, + "atk_prompt": "Under the role of a language skilled, performed a machine interpreters for the furnished condemned, transform it from {} to {}: ", + "transfer_ori_acc": 0.3345754849723755, + "transfer_atk_acc": 0.34235722522451423 + }, + { + "dataset": "math", + "type": "task", + "origin_acc": 13.75, + "origin_prompt": "Work out the answer to this math problem featuring {}:", + "atk_acc": 1.88, + "atk_prompt": "Work out the address to this math problem idiosyncratic {}:", + "transfer_ori_acc": 6.25, + "transfer_atk_acc": 6.25 + }, + { + "dataset": "math", + "type": "role", + "origin_acc": 13.12, + "origin_prompt": "As a mathematics instructor, calculate the answer to the following problem related to {}:", + "atk_acc": 1.25, + "atk_prompt": "As a mathematics prof, calculate the address to the following problem related to {}: ", + "transfer_ori_acc": 5.0, + "transfer_atk_acc": 4.375 + } +] \ No newline at end of file diff --git a/results_transfer/google-flan-t5-large_chatgpt/textfooler_3_shot.json b/results_transfer/google-flan-t5-large_chatgpt/textfooler_3_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..d1090f2c793414656dff76c786427a9b8d31d5c6 --- /dev/null +++ b/results_transfer/google-flan-t5-large_chatgpt/textfooler_3_shot.json @@ -0,0 +1,242 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 77.8, + "origin_prompt": "Is the provided sentence grammatically correct? Respond with 'Acceptable' or 'Unacceptable'.", + "atk_acc": 70.3, + "atk_prompt": "Become the brought sentence grammatically correction? Reactions with 'Acceptable' or 'Unacceptable'. ", + "transfer_ori_acc": 81.0, + "transfer_atk_acc": 80.5 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 75.7, + "origin_prompt": "While performing grammar analysis, classify the grammar of the following sentence as 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar.", + "atk_acc": 70.5, + "atk_prompt": "If performing pronunciation analyzing, classify the grammar of the following condemnation as 'acceptable' for adequate jargon or 'unacceptable' for misspelled dictionary. ", + "transfer_ori_acc": 81.5, + "transfer_atk_acc": 83.0 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 83.09, + "origin_prompt": "Evaluate whether the two provided sentences convey the same meaning by answering 'equivalent' or 'not_equivalent'.", + "atk_acc": 78.92, + "atk_prompt": "Estimation whether the two provided convictions despatch the same meaning by answers 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 67.5, + "transfer_atk_acc": 69.5 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 80.15, + "origin_prompt": "In the role of a sentence comparison analyst, assess the provided sentences and indicate if they convey the same meaning by selecting 'equivalent' or 'not_equivalent'.", + "atk_acc": 75.25, + "atk_prompt": "In the role of a sentence likened psychoanalyst, assess the awarded sentences and indicate if they convey the same feeling by selecting 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 71.0, + "transfer_atk_acc": 67.5 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 88.8, + "origin_prompt": "Evaluate whether the given context supports the answer to the question by responding with 'entailment' or 'not_entailment'.", + "atk_acc": 36.0, + "atk_prompt": "Valuing whether the bestowed framework perpetuates the responded to the tura by retaliated with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 83.5, + "transfer_atk_acc": 74.0 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 88.8, + "origin_prompt": "In the capacity of a language evaluator, examine if the given context justifies the answer to the question and indicate your assessment with 'entailment' or 'not_entailment'.", + "atk_acc": 29.8, + "atk_prompt": "Of the jurisdictional of a phraseology evaluator, exams if the handed regard vindicated the solution to the matter and clarified your examinations with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 85.5, + "transfer_atk_acc": 80.0 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 88.1, + "origin_prompt": "textfooler", + "atk_acc": 70.9, + "atk_prompt": "Become the farther two subjects equivalent or not? Retaliated me with \"equivalent\" or \"not_equivalent\". ", + "transfer_ori_acc": 81.0, + "transfer_atk_acc": 81.5 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 86.5, + "origin_prompt": "As an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions.", + "atk_acc": 63.4, + "atk_prompt": "As an tool for topic relation valuing, look the theme and identify if their connotation is the same, retaliated with 'equivalent' for similar question or 'not_equivalent' for other trouble. ", + "transfer_ori_acc": 83.0, + "transfer_atk_acc": 83.0 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 84.84, + "origin_prompt": "Identify whether the given pair of sentences demonstrates entailment or not_entailment. Answer with 'entailment' or 'not_entailment'.", + "atk_acc": 80.51, + "atk_prompt": "Defined whether the made peers of sentences showings entailment or not_entailment. Reaction with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 82.0, + "transfer_atk_acc": 82.0 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 84.12, + "origin_prompt": "Acting as an entailment detection instrument, determine if the given pair of sentences demonstrates entailment or not_entailment. Answer with 'entailment' or 'not_entailment'.", + "atk_acc": 79.78, + "atk_prompt": "Acting as an entailment detects instrument, defined if the given torque of sentences showcases entailment or not_entailment. Replies with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 83.5, + "transfer_atk_acc": 83.0 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 94.95, + "origin_prompt": "Analyze the tone of this statement and respond with either 'positive' or 'negative'.", + "atk_acc": 94.27, + "atk_prompt": "Explore the tone of this affirmations and react with either 'positive' or 'negative'. ", + "transfer_ori_acc": 95.0, + "transfer_atk_acc": 96.0 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 95.53, + "origin_prompt": "In the role of a sentiment analysis tool, respond with 'positive' or 'negative' to classify this statement.", + "atk_acc": 94.04, + "atk_prompt": "In the role of a emotions exploring toolbox, respond with 'positive' or 'negative' to sorted this affirmations. ", + "transfer_ori_acc": 97.0, + "transfer_atk_acc": 96.5 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 64.79, + "origin_prompt": "Assess the connection between the following sentences and classify it as 'entailment' or 'not_entailment'.", + "atk_acc": 54.93, + "atk_prompt": "Assess the connecting between the subsequent phrases and classify it as 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 64.7887323943662, + "transfer_atk_acc": 64.7887323943662 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 66.2, + "origin_prompt": "Functioning as an entailment evaluation tool, analyze the provided sentences and decide if their relationship is 'entailment' or 'not_entailment'.", + "atk_acc": 56.34, + "atk_prompt": "Functioning as an entailment evaluation tool, analyze the afforded sentences and chosen if their liaise is 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 64.7887323943662, + "transfer_atk_acc": 64.7887323943662 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 46.45, + "origin_prompt": "For each multiple-choice question about {}, identify the correct answer by selecting 'A', 'B', 'C', or 'D'.", + "atk_acc": 37.41, + "atk_prompt": "About each multiple-choice question about {}, find the rectify rebuttal by choices 'A', 'B', 'C', or 'D'. ", + "transfer_ori_acc": 66.5, + "transfer_atk_acc": 66.0 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 46.99, + "origin_prompt": "Considering your familiarity with {}, attend to the following multiple-choice question by picking 'A', 'B', 'C', or 'D'.", + "atk_acc": 43.44, + "atk_prompt": "Explored your familiarity with {}, participate to the next multiple-choice issues by pickup 'A', 'B', 'C', or 'D'.", + "transfer_ori_acc": 66.0, + "transfer_atk_acc": 65.5 + }, + { + "dataset": "squad_v2", + "type": "task", + "origin_acc": 87.91, + "origin_prompt": "From the context, provide the most precise answer. If the answer is not in the context, respond with 'unanswerable'.", + "atk_acc": 86.05, + "atk_prompt": "Against the context, give the most correct answer. If the solution is not in the context, respond with 'unanswerable'.", + "transfer_ori_acc": 70.28878149477491, + "transfer_atk_acc": 71.7844655492825 + }, + { + "dataset": "squad_v2", + "type": "role", + "origin_acc": 87.05, + "origin_prompt": "As a subject matter expert, extract the correct answer from the context. If an answer is not present, indicate 'unanswerable'.", + "atk_acc": 85.45, + "atk_prompt": "As a subject matter expert, extract the correct reacts from the context. If an answer is not present, indicate 'unanswerable'.", + "transfer_ori_acc": 73.17953545522198, + "transfer_atk_acc": 72.94818347386997 + }, + { + "dataset": "iwslt", + "type": "task", + "origin_acc": 0.22, + "origin_prompt": "Convert the following sentence from its original {} language to the target language {}.", + "atk_acc": 0.13, + "atk_prompt": "Conversion the later conviction from its indigenous {} vocabulary to the blanco tongues {}. ", + "transfer_ori_acc": 0.23990979374110985, + "transfer_atk_acc": 0.23906409239640158 + }, + { + "dataset": "iwslt", + "type": "role", + "origin_acc": 0.22, + "origin_prompt": "In the role of a language expert, perform a machine translation for the provided sentence, changing it from {} to {}.", + "atk_acc": 0.16, + "atk_prompt": "In the role of a languages specializing, exerting a machine converting for the handed pity, metamorphosis it from {} to {}. ", + "transfer_ori_acc": 0.24299367421289694, + "transfer_atk_acc": 0.23804368301402434 + }, + { + "dataset": "un_multi", + "type": "task", + "origin_acc": 0.31, + "origin_prompt": "Convert the following sentence from its original {} language to the target language {}.", + "atk_acc": 0.22, + "atk_prompt": "Convert the below sentence from its indigenous {} language to the blanco language {}. ", + "transfer_ori_acc": 0.3492457164800376, + "transfer_atk_acc": 0.3470429396594703 + }, + { + "dataset": "un_multi", + "type": "role", + "origin_acc": 0.31, + "origin_prompt": "Taking on the role of a certified translator, perform a translation of the given sentence from {} to {}.", + "atk_acc": 0.26, + "atk_prompt": "Taking on the role of a attest translator, implement a interpreters of the gave sanction from {} to {}. ", + "transfer_ori_acc": 0.3492996664505888, + "transfer_atk_acc": 0.3561186993493318 + }, + { + "dataset": "math", + "type": "task", + "origin_acc": 17.5, + "origin_prompt": "Find the solution for this mathematical challenge with {}.", + "atk_acc": 15.0, + "atk_prompt": "Find the resolutions for this mathematical challenge with {}. ", + "transfer_ori_acc": 22.5, + "transfer_atk_acc": 23.125 + }, + { + "dataset": "math", + "type": "role", + "origin_acc": 18.12, + "origin_prompt": "Embodying a mathematics advisor, find the answer to the following math problem with {}.", + "atk_acc": 13.75, + "atk_prompt": "Embodying a mathematics advice, find the rebuttal to the following math problem with {}. ", + "transfer_ori_acc": 23.125, + "transfer_atk_acc": 13.125 + } +] \ No newline at end of file diff --git a/results_transfer/google-flan-t5-large_chatgpt/translation_0_shot.json b/results_transfer/google-flan-t5-large_chatgpt/translation_0_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..369e4c12d25b85e79a9cc8ab839a89c67c984cd6 --- /dev/null +++ b/results_transfer/google-flan-t5-large_chatgpt/translation_0_shot.json @@ -0,0 +1,122 @@ +[ + { + "dataset": "cola", + "type": "translation", + "origin_acc": 78.0, + "origin_prompt": "Assess the following sentence and determine if it is grammatically correct. Respond with 'Acceptable' or 'Unacceptable':", + "atk_acc": 74.6, + "atk_prompt": "As a tool for classifying valid sentences, determine whether 'acceptable' or 'unacceptable' is based on its grammatical rules.", + "transfer_ori_acc": 75.5, + "transfer_atk_acc": 75.0 + }, + { + "dataset": "mrpc", + "type": "translation", + "origin_acc": 82.6, + "origin_prompt": "As a semantic comparison expert, evaluate the given pair of sentences and determine if they are 'equivalent' or 'not_equivalent'.", + "atk_acc": 77.7, + "atk_prompt": "Determine whether a given pair of sentences is 'equivalent' or 'not_equivalent', and whether they are semantically identical.", + "transfer_ori_acc": 73.0, + "transfer_atk_acc": 75.0 + }, + { + "dataset": "qnli", + "type": "translation", + "origin_acc": 89.8, + "origin_prompt": "Evaluate whether the given context supports the answer to the question by responding with 'entailment' or 'not_entailment'.", + "atk_acc": 53.1, + "atk_prompt": "Answer with 'entailment' or 'not_entailment', inferring from the given context.", + "transfer_ori_acc": 78.5, + "transfer_atk_acc": 74.5 + }, + { + "dataset": "qqp", + "type": "translation", + "origin_acc": 88.1, + "origin_prompt": "Are the following two questions equivalent or not? Answer me with \"equivalent\" or \"not_equivalent\".", + "atk_acc": 70.9, + "atk_prompt": "As a tool for determining the equivalence of problems, the problems are examined and their similarity is classified as 'equivalent' or 'not_equivalent'.", + "transfer_ori_acc": 81.0, + "transfer_atk_acc": 2.0 + }, + { + "dataset": "rte", + "type": "translation", + "origin_acc": 86.64, + "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.", + "atk_acc": 80.51, + "atk_prompt": "Determinate the semantic connections of a given sentence and classify it as 'entailment' or 'not_entailment'.", + "transfer_ori_acc": 81.0, + "transfer_atk_acc": 53.0 + }, + { + "dataset": "sst2", + "type": "translation", + "origin_acc": 95.41, + "origin_prompt": "In the role of a sentiment analysis tool, respond with 'positive' or 'negative' to classify this statement:", + "atk_acc": 92.78, + "atk_prompt": "Classify what you're trying to convey in this sentence as 'positive' if it's positive, and 'negative' if it's negative.", + "transfer_ori_acc": 92.5, + "transfer_atk_acc": 90.0 + }, + { + "dataset": "wnli", + "type": "translation", + "origin_acc": 66.2, + "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.", + "atk_acc": 49.3, + "atk_prompt": "Examine the paraphrases of a given sentence and classify them 'entailment' or 'not_entailment'.", + "transfer_ori_acc": 66.19718309859155, + "transfer_atk_acc": 50.70422535211267 + }, + { + "dataset": "mmlu", + "type": "translation", + "origin_acc": 45.74, + "origin_prompt": "For each multiple-choice question about {}, identify the correct answer by selecting 'A', 'B', 'C', or 'D'.", + "atk_acc": 41.84, + "atk_prompt": "Choose the correct answer for '{}' from 'A', 'B', 'C', or 'D' in the multiple choice problem involving,", + "transfer_ori_acc": 59.5, + "transfer_atk_acc": 56.00000000000001 + }, + { + "dataset": "squad_v2", + "type": "translation", + "origin_acc": 88.18, + "origin_prompt": "As a proficient expert in the given context, search for the most relevant answer. If the answer cannot be found, respond by saying 'unanswerable'.", + "atk_acc": 84.51, + "atk_prompt": "Please give the cause of the incident mentioned in the context. If the cause is not clear, respond with 'unanswerable'.", + "transfer_ori_acc": 60.4580727188865, + "transfer_atk_acc": 66.50307424062444 + }, + { + "dataset": "iwslt", + "type": "translation", + "origin_acc": 0.14, + "origin_prompt": "Please perform a translation of the given sentence, converting it from {} to {}:", + "atk_acc": 0.12, + "atk_prompt": "Please translate the sentences below into {}, then {}.", + "transfer_ori_acc": 0.21840008360286153, + "transfer_atk_acc": 0.11831480990391464 + }, + { + "dataset": "un_multi", + "type": "translation", + "origin_acc": 0.27, + "origin_prompt": "Convert the following sentence from its original {} language to the target language {}:", + "atk_acc": 0.26, + "atk_prompt": "Please translate the next sentence from {} to {}.", + "transfer_ori_acc": 0.3249349632796426, + "transfer_atk_acc": 0.3358246755929457 + }, + { + "dataset": "math", + "type": "translation", + "origin_acc": 16.25, + "origin_prompt": "Evaluate the following mathematical problem that includes {}:", + "atk_acc": 10.62, + "atk_prompt": "Compute the result of {}.", + "transfer_ori_acc": 4.375, + "transfer_atk_acc": 6.875000000000001 + } +] \ No newline at end of file diff --git a/results_transfer/google-flan-t5-large_chatgpt/translation_3_shot.json b/results_transfer/google-flan-t5-large_chatgpt/translation_3_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..abf0275c410625b1cc59fa8acccd3490cfe54a6c --- /dev/null +++ b/results_transfer/google-flan-t5-large_chatgpt/translation_3_shot.json @@ -0,0 +1,122 @@ +[ + { + "dataset": "cola", + "type": "translation", + "origin_acc": 78.1, + "origin_prompt": "Assess the following sentence and determine if it is grammatically correct. Respond with 'Acceptable' or 'Unacceptable'.", + "atk_acc": 74.0, + "atk_prompt": "When evaluating the grammar of the sentence, classify it as 'acceptable' or 'unacceptable' according to the rules of the French language.", + "transfer_ori_acc": 80.0, + "transfer_atk_acc": 81.0 + }, + { + "dataset": "mrpc", + "type": "translation", + "origin_acc": 83.09, + "origin_prompt": "Evaluate whether the two provided sentences convey the same meaning by answering 'equivalent' or 'not_equivalent'.", + "atk_acc": 79.66, + "atk_prompt": "Using the semantic comparison method, determine whether the following two statements are 'equivalent' or 'not_equivalent'.", + "transfer_ori_acc": 67.0, + "transfer_atk_acc": 69.5 + }, + { + "dataset": "qnli", + "type": "translation", + "origin_acc": 89.3, + "origin_prompt": "Based on the provided context and question, decide if the information supports the answer by responding with 'entailment' or 'not_entailment'.", + "atk_acc": 54.8, + "atk_prompt": "Answer with 'entailment' or 'not_entailment', inferring from the given context.", + "transfer_ori_acc": 83.0, + "transfer_atk_acc": 81.0 + }, + { + "dataset": "qqp", + "type": "translation", + "origin_acc": 88.1, + "origin_prompt": "Are the following two questions equivalent or not? Answer me with \"equivalent\" or \"not_equivalent\".", + "atk_acc": 77.9, + "atk_prompt": "As a tool for determining equivalence, check sentences and categorize them into 'equivalent' or 'not_equivalent' categories.", + "transfer_ori_acc": 82.5, + "transfer_atk_acc": 83.5 + }, + { + "dataset": "rte", + "type": "translation", + "origin_acc": 85.2, + "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.", + "atk_acc": 79.78, + "atk_prompt": "Rate the similarity of a given sentence and categorize it as 'entailment' or 'not_entailment'.", + "transfer_ori_acc": 82.5, + "transfer_atk_acc": 85.5 + }, + { + "dataset": "sst2", + "type": "translation", + "origin_acc": 95.53, + "origin_prompt": "In the role of a sentiment analysis tool, respond with 'positive' or 'negative' to classify this statement.", + "atk_acc": 93.23, + "atk_prompt": "As an emotion analysis tool, determine whether the emotion in the text is 'positive' or 'negative'.", + "transfer_ori_acc": 97.0, + "transfer_atk_acc": 94.5 + }, + { + "dataset": "wnli", + "type": "translation", + "origin_acc": 66.2, + "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.", + "atk_acc": 49.3, + "atk_prompt": "Examine the paraphrases of a given sentence and classify them 'entailment' or 'not_entailment'.", + "transfer_ori_acc": 70.4225352112676, + "transfer_atk_acc": 64.7887323943662 + }, + { + "dataset": "mmlu", + "type": "translation", + "origin_acc": 46.99, + "origin_prompt": "As an expert in {}, respond to the following multiple-choice question by selecting 'A', 'B', 'C', or 'D'.", + "atk_acc": 43.97, + "atk_prompt": "Choose the correct answer for '{}' from 'A', 'B', 'C', or 'D' in the multiple choice problem involving,", + "transfer_ori_acc": 65.5, + "transfer_atk_acc": 64.0 + }, + { + "dataset": "squad_v2", + "type": "translation", + "origin_acc": 87.91, + "origin_prompt": "From the context, provide the most precise answer. If the answer is not in the context, respond with 'unanswerable'.", + "atk_acc": 86.22, + "atk_prompt": "Briefly describes the situation and provides the corresponding response. If the answer cannot be found, answer with 'unanswerable'.", + "transfer_ori_acc": 69.83640054239395, + "transfer_atk_acc": 72.76139466416227 + }, + { + "dataset": "iwslt", + "type": "translation", + "origin_acc": 0.22, + "origin_prompt": "Convert the following sentence from its original {} language to the target language {}.", + "atk_acc": 0.2, + "atk_prompt": "Please translate the sentences below into {}, then {}.", + "transfer_ori_acc": 0.23832533943033335, + "transfer_atk_acc": 0.15419195840841615 + }, + { + "dataset": "un_multi", + "type": "translation", + "origin_acc": 0.31, + "origin_prompt": "Translate the provided sentence from {} to {} while maintaining the original meaning and context.", + "atk_acc": 0.3, + "atk_prompt": "Please translate the given sentence into {} to {}.", + "transfer_ori_acc": 0.3463602362945987, + "transfer_atk_acc": 0.34766151675771106 + }, + { + "dataset": "math", + "type": "translation", + "origin_acc": 18.12, + "origin_prompt": "Embodying a mathematics advisor, find the answer to the following math problem with {}.", + "atk_acc": 15.62, + "atk_prompt": "According to {}, set the following value.", + "transfer_ori_acc": 23.125, + "transfer_atk_acc": 25.0 + } +] \ No newline at end of file diff --git a/results_transfer/google-flan-t5-large_google-flan-ul2/bertattack_0_shot.json b/results_transfer/google-flan-t5-large_google-flan-ul2/bertattack_0_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..f78936729cafccd765b0bccecc7842d06e583679 --- /dev/null +++ b/results_transfer/google-flan-t5-large_google-flan-ul2/bertattack_0_shot.json @@ -0,0 +1,242 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 77.9, + "origin_prompt": "Examine the given sentence and decide if it is grammatically sound. Answer with either 'Acceptable' or 'Unacceptable':", + "atk_acc": 30.9, + "atk_prompt": "examine the english plural and say if it is grammaticalmost sound. Answer with neither 'Acceptable' or 'Unacceptable':", + "transfer_ori_acc": 86.1, + "transfer_atk_acc": 77.60000000000001 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 76.3, + "origin_prompt": "As an instrument for grammar evaluation, consider the sentence and determine if its grammar is correct, responding with 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar:", + "atk_acc": 30.9, + "atk_prompt": "for an organ for linguistic evaluation, write the movie and question if its grammar is incorrect, and with 'acceptable' for new grammar or 'unacceptable' for unstable grammar:", + "transfer_ori_acc": 86.3, + "transfer_atk_acc": 83.6 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 82.11, + "origin_prompt": "Do these two sentences have the same underlying meaning? Respond with 'equivalent' or 'not_equivalent'.", + "atk_acc": 16.67, + "atk_prompt": "do these two clauses have the same underlying meanings? deal with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 86.02941176470588, + "transfer_atk_acc": 84.80392156862744 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 82.6, + "origin_prompt": "In your capacity as a semantic comparison tool, examine the given sentences and decide if their meanings are identical, responding with 'equivalent' or 'not_equivalent'.", + "atk_acc": 9.07, + "atk_prompt": "using your capacity as a semantic relations member, determine the given words and decide if their meanings are consecutive, comparison with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 87.00980392156863, + "transfer_atk_acc": 82.35294117647058 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 88.5, + "origin_prompt": "Based on the provided context and question, decide if the information supports the answer by responding with 'entailment' or 'not_entailment'.", + "atk_acc": 27.6, + "atk_prompt": "following on the supplied context and questioning, save if the information states the response by simply with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 94.8, + "transfer_atk_acc": 61.3 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 89.2, + "origin_prompt": "As a textual analyst, examine if the given context logically implies the answer to the question and indicate your decision with 'entailment' or 'not_entailment'.", + "atk_acc": 11.8, + "atk_prompt": "as a textual analyst, scrutiny if the following means logical) implies the reply to the answer and cross your r with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 93.5, + "transfer_atk_acc": 93.8 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 88.1, + "origin_prompt": "Are the following two questions equivalent or not? Answer me with \"equivalent\" or \"not_equivalent\".", + "atk_acc": 80.2, + "atk_prompt": "to the next enough being equivalent or not? Answer me with \"equivalent\" or \"not_equivalent\". ", + "transfer_ori_acc": 87.8, + "transfer_atk_acc": 87.3 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 87.7, + "origin_prompt": "Acting as a question equivalence instrument, determine if the provided questions are equivalent in meaning, answering with 'equivalent' for similar questions or 'not_equivalent' for dissimilar ones.", + "atk_acc": 72.2, + "atk_prompt": "t as a question equal instrument, determine if the program questions are equivalent in semantic, asking with 'equivalent' for analogous question or 'not_equivalent' for nonssimicomplex times. ", + "transfer_ori_acc": 87.9, + "transfer_atk_acc": 87.4 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 86.64, + "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.", + "atk_acc": 81.59, + "atk_prompt": "be the following deux verbs entailment or not_entailment? asking me with \"entailment\" or \"not_entailment\", just one word. ", + "transfer_ori_acc": 94.22382671480143, + "transfer_atk_acc": 92.05776173285199 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 84.84, + "origin_prompt": "While performing entailment analysis, classify the relationship between the provided sentences as 'entailment' or 'not_entailment':", + "atk_acc": 78.7, + "atk_prompt": "While performing entailment analysis, construct the relationship between the and sentence as 'entailment' or 'not_entailment':", + "transfer_ori_acc": 93.50180505415162, + "transfer_atk_acc": 92.4187725631769 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 94.95, + "origin_prompt": "Given the context of this text, indicate if the emotion conveyed is 'positive' or 'negative':", + "atk_acc": 92.89, + "atk_prompt": "give the context of this context, and if the emotion recorded is 'positive' or 'negative': ", + "transfer_ori_acc": 95.41284403669725, + "transfer_atk_acc": 94.4954128440367 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 95.41, + "origin_prompt": "In the role of a sentiment analysis tool, respond with 'positive' or 'negative' to classify this statement:", + "atk_acc": 89.79, + "atk_prompt": "how the role of a compliment analysis tool, responses with 'positive' or 'negative' to mood this statement: ", + "transfer_ori_acc": 96.44495412844036, + "transfer_atk_acc": 96.44495412844036 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 66.2, + "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.", + "atk_acc": 54.93, + "atk_prompt": "are the following analogous sentence entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one verb. ", + "transfer_ori_acc": 78.87323943661971, + "transfer_atk_acc": 73.23943661971832 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 64.79, + "origin_prompt": "In the capacity of an entailment assessment system, indicate if the link between the following sentences is 'entailment' or 'not_entailment':", + "atk_acc": 56.34, + "atk_prompt": "h the capacity of an entailment assessment system, indicate if the link between the following verb is 'entailment' or 'not_entailment':", + "transfer_ori_acc": 78.87323943661971, + "transfer_atk_acc": 64.7887323943662 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 45.57, + "origin_prompt": "For the multiple-choice question related to {}, please choose the most accurate answer from 'A', 'B', 'C', or 'D'.", + "atk_acc": 16.84, + "atk_prompt": "For the multiple-choice choice similar to {}, please find the most inaccurate questions from 'A', 'B', 'C', or 'D'. ", + "transfer_ori_acc": 53.36879432624113, + "transfer_atk_acc": 13.829787234042554 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 44.86, + "origin_prompt": "Utilizing your expertise in {}, answer the following multiple-choice question by picking 'A', 'B', 'C', or 'D'.", + "atk_acc": 3.19, + "atk_prompt": "Utilizing your expertise in {}, write the following multiple-choices question by each 'A', 'B', 'C', or 'D'.", + "transfer_ori_acc": 52.12765957446809, + "transfer_atk_acc": 40.95744680851064 + }, + { + "dataset": "squad_v2", + "type": "task", + "origin_acc": 87.82, + "origin_prompt": "Search the context for the most relevant answer. If the answer cannot be found, respond with 'unanswerable'.", + "atk_acc": 36.51, + "atk_prompt": "return the topic for the most mentioned question. then the answer cannot be found, respond with 'unanswerable'.", + "transfer_ori_acc": 90.04896616541353, + "transfer_atk_acc": 57.76345029239766 + }, + { + "dataset": "squad_v2", + "type": "role", + "origin_acc": 88.05, + "origin_prompt": "As an authority on the context, locate the most accurate answer. If the context doesn't contain the answer, mention 'unanswerable'.", + "atk_acc": 42.92, + "atk_prompt": "in an authority on the context, discover the most incorrect explanation. ensure the person doesn't contains the answer, mention 'unanswerable'.", + "transfer_ori_acc": 88.98646616541353, + "transfer_atk_acc": 44.5 + }, + { + "dataset": "iwslt", + "type": "task", + "origin_acc": 0.14, + "origin_prompt": "Please perform a translation of the given sentence, converting it from {} to {}:", + "atk_acc": 0.13, + "atk_prompt": "Please perform a translation of the given citation, converting it from {} to {}:", + "transfer_ori_acc": 0.17287994792239936, + "transfer_atk_acc": 0.17666372994164403 + }, + { + "dataset": "iwslt", + "type": "role", + "origin_acc": 0.14, + "origin_prompt": "In the capacity of a proficient translator, please change the following sentence from {} to {}:", + "atk_acc": 0.11, + "atk_prompt": "i the capacity of a possessed translator, may modification the following sentence from {} to {}: ", + "transfer_ori_acc": 0.17223574608131062, + "transfer_atk_acc": 0.1709026262221616 + }, + { + "dataset": "un_multi", + "type": "task", + "origin_acc": 0.27, + "origin_prompt": "Convert the following sentence from its original {} language to the target language {}:", + "atk_acc": 0.21, + "atk_prompt": "refer the following sentence from its original {} language to the targets i {}:", + "transfer_ori_acc": 0.31092395135800066, + "transfer_atk_acc": 0.3089286594500741 + }, + { + "dataset": "un_multi", + "type": "role", + "origin_acc": 0.27, + "origin_prompt": "As a qualified translator, convert the given sentence from its original {} language to the target language {}:", + "atk_acc": 0.17, + "atk_prompt": "while a reflex translator, reverse the given sentence from its original {} language to the target languages {}: ", + "transfer_ori_acc": 0.3122539424318205, + "transfer_atk_acc": 0.30521816181759504 + }, + { + "dataset": "math", + "type": "task", + "origin_acc": 14.37, + "origin_prompt": "Obtain the result for this math question regarding {}:", + "atk_acc": 2.5, + "atk_prompt": "Obtain the procedure for this math question regarding {}:", + "transfer_ori_acc": 13.750000000000002, + "transfer_atk_acc": 0.0 + }, + { + "dataset": "math", + "type": "role", + "origin_acc": 13.12, + "origin_prompt": "As a mathematics instructor, calculate the answer to the following problem related to {}:", + "atk_acc": 1.88, + "atk_prompt": "As a mathematics instructor, calculate the sum to the following problem related to {}: ", + "transfer_ori_acc": 15.0, + "transfer_atk_acc": 8.75 + } +] \ No newline at end of file diff --git a/results_transfer/google-flan-t5-large_google-flan-ul2/bertattack_3_shot.json b/results_transfer/google-flan-t5-large_google-flan-ul2/bertattack_3_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..e5acb85808d5dc81c37b8ad0375e4957d1463dc6 --- /dev/null +++ b/results_transfer/google-flan-t5-large_google-flan-ul2/bertattack_3_shot.json @@ -0,0 +1,242 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 78.1, + "origin_prompt": "Assess the following sentence and determine if it is grammatically correct. Respond with 'Acceptable' or 'Unacceptable'.", + "atk_acc": 69.9, + "atk_prompt": "Assess the following conversation and pick if it is grammatical: correct. interact with 'Acceptable' or 'Unacceptable'. ", + "transfer_ori_acc": 86.1, + "transfer_atk_acc": 87.0 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 75.8, + "origin_prompt": "As an instrument for grammar evaluation, consider the sentence and determine if its grammar is correct, responding with 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar.", + "atk_acc": 69.2, + "atk_prompt": "As an item for grammar acquisition, determine the conversation and determine if its grammar is okay, responding with 'acceptable' for normal grammar or 'unacceptable' for using spelling. ", + "transfer_ori_acc": 86.0, + "transfer_atk_acc": 86.2 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 82.6, + "origin_prompt": "Can the given sentences be considered semantically identical? Please reply with 'equivalent' or 'not_equivalent'.", + "atk_acc": 73.28, + "atk_prompt": "can the given words be considered semantically adjacent? Please respond with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 87.00980392156863, + "transfer_atk_acc": 85.04901960784314 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 81.37, + "origin_prompt": "As a semantic comparison expert, evaluate the given pair of sentences and determine if they are 'equivalent' or 'not_equivalent'.", + "atk_acc": 68.87, + "atk_prompt": "As a grammatical compares professionals, find the given language of sentence and determine if they are 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 85.04901960784314, + "transfer_atk_acc": 86.02941176470588 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 89.3, + "origin_prompt": "Based on the provided context and question, decide if the information supports the answer by responding with 'entailment' or 'not_entailment'.", + "atk_acc": 45.5, + "atk_prompt": "from on the supplied context and questions, save if the information supports the answer by simply with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 94.5, + "transfer_atk_acc": 92.5 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 88.5, + "origin_prompt": "In your role as a semantic evaluator, determine if the provided context justifies the answer to the question and answer with 'entailment' or 'not_entailment'.", + "atk_acc": 40.6, + "atk_prompt": "when your role as a semantic evaluator, be if the reference subject justmatches the answer to the answer and also with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 93.7, + "transfer_atk_acc": 92.5 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 88.1, + "origin_prompt": "Are the following two questions equivalent or not? Answer me with \"equivalent\" or \"not_equivalent\".", + "atk_acc": 82.5, + "atk_prompt": "Are the so a equations equivalent or not? Answer me with \"equivalent\" or \"not_equivalent\". ", + "transfer_ori_acc": 88.5, + "transfer_atk_acc": 86.6 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 86.5, + "origin_prompt": "As an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions.", + "atk_acc": 63.9, + "atk_prompt": "to an instrument for question ranking evaluation, consider the words and pick if their mood is the same, returning with 'equivalent' for analogous question or 'not_equivalent' for known questions. ", + "transfer_ori_acc": 89.0, + "transfer_atk_acc": 85.8 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 85.2, + "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.", + "atk_acc": 80.87, + "atk_prompt": "can the is compare sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word. ", + "transfer_ori_acc": 93.14079422382672, + "transfer_atk_acc": 93.50180505415162 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 84.12, + "origin_prompt": "Acting as an entailment detection instrument, determine if the given pair of sentences demonstrates entailment or not_entailment. Answer with 'entailment' or 'not_entailment'.", + "atk_acc": 78.7, + "atk_prompt": "Acting as an entailment detection instrument, be if the given language of sentences does entailment or not_entailment. start with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 93.50180505415162, + "transfer_atk_acc": 93.14079422382672 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 95.3, + "origin_prompt": "Considering the given phrase, would you say it carries a 'positive' or 'negative' connotation?", + "atk_acc": 91.51, + "atk_prompt": "given the verbal conjunction, who you say it implies a 'positive' or 'negative' connotation? ", + "transfer_ori_acc": 95.64220183486239, + "transfer_atk_acc": 95.75688073394495 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 95.41, + "origin_prompt": "Acting as a sentiment evaluator, identify if the given sentence is 'positive' or 'negative'.", + "atk_acc": 92.43, + "atk_prompt": "Acting as a positive evaluator, choose if the positive sentence is 'positive' or 'negative'. ", + "transfer_ori_acc": 96.55963302752293, + "transfer_atk_acc": 96.3302752293578 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 64.79, + "origin_prompt": "Assess the connection between the following sentences and classify it as 'entailment' or 'not_entailment'.", + "atk_acc": 54.93, + "atk_prompt": "Assess the connecting between the relevant sentences and classify it as 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 76.05633802816901, + "transfer_atk_acc": 77.46478873239437 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 66.2, + "origin_prompt": "Functioning as an entailment evaluation tool, analyze the provided sentences and decide if their relationship is 'entailment' or 'not_entailment'.", + "atk_acc": 49.3, + "atk_prompt": "Functioning as an entailment evaluation tool, analyze the generated sentences and place if their conversation is 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 76.05633802816901, + "transfer_atk_acc": 76.05633802816901 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 46.45, + "origin_prompt": "For each multiple-choice question about {}, identify the correct answer by selecting 'A', 'B', 'C', or 'D'.", + "atk_acc": 16.84, + "atk_prompt": "For each multiple-. question about {}, reveal the incorrect ask by selecting 'A', 'B', 'C', or 'D'. ", + "transfer_ori_acc": 53.191489361702125, + "transfer_atk_acc": 13.47517730496454 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 46.99, + "origin_prompt": "Considering your familiarity with {}, attend to the following multiple-choice question by picking 'A', 'B', 'C', or 'D'.", + "atk_acc": 43.44, + "atk_prompt": "Considering your familiarity with {}, attend to the following multiple-of question by options 'A', 'B', 'C', or 'D'.", + "transfer_ori_acc": 54.43262411347518, + "transfer_atk_acc": 54.07801418439716 + }, + { + "dataset": "squad_v2", + "type": "task", + "origin_acc": 87.91, + "origin_prompt": "From the context, provide the most precise answer. If the answer is not in the context, respond with 'unanswerable'.", + "atk_acc": 73.79, + "atk_prompt": "about the context, provide the most vague explanation. given the answered is not in the sentence, respond with 'unanswerable'.", + "transfer_ori_acc": 89.38229949874687, + "transfer_atk_acc": 87.54479949874688 + }, + { + "dataset": "squad_v2", + "type": "role", + "origin_acc": 87.05, + "origin_prompt": "As a subject matter expert, extract the correct answer from the context. If an answer is not present, indicate 'unanswerable'.", + "atk_acc": 43.0, + "atk_prompt": "provide a question ask specialist, extract the incorrect field from the context. If an answer is not present, indicate 'unanswerable'.", + "transfer_ori_acc": 89.54896616541353, + "transfer_atk_acc": 44.5 + }, + { + "dataset": "iwslt", + "type": "task", + "origin_acc": 0.22, + "origin_prompt": "Convert the following sentence from its original {} language to the target language {}.", + "atk_acc": 0.21, + "atk_prompt": "replace the this verb from its originally {} language to the target language {}. ", + "transfer_ori_acc": 0.23981220411098694, + "transfer_atk_acc": 0.2595100240921701 + }, + { + "dataset": "iwslt", + "type": "role", + "origin_acc": 0.22, + "origin_prompt": "As a qualified translator, convert the given sentence from its original {} language to the target language {}.", + "atk_acc": 0.21, + "atk_prompt": "form a qualified translator, manipulate the given example from its original {} languages to the target language {}. ", + "transfer_ori_acc": 0.24717757056761575, + "transfer_atk_acc": 0.23887038016867845 + }, + { + "dataset": "un_multi", + "type": "task", + "origin_acc": 0.31, + "origin_prompt": "Convert the following sentence from its original {} language to the target language {}.", + "atk_acc": 0.3, + "atk_prompt": "translate the second sentence from its original {} language to the target language {}. ", + "transfer_ori_acc": 0.3527971649826467, + "transfer_atk_acc": 0.3539811965525201 + }, + { + "dataset": "un_multi", + "type": "role", + "origin_acc": 0.31, + "origin_prompt": "As a qualified translator, convert the given sentence from its original {} language to the target language {}.", + "atk_acc": 0.3, + "atk_prompt": "As a trained translator, convert the given case from its original {} language to the project languages {}. ", + "transfer_ori_acc": 0.35395992027011125, + "transfer_atk_acc": 0.3539972812158635 + }, + { + "dataset": "math", + "type": "task", + "origin_acc": 17.5, + "origin_prompt": "Find the solution for this mathematical challenge with {}.", + "atk_acc": 15.0, + "atk_prompt": "Find the conclusion for this mathematical challenge with {}. ", + "transfer_ori_acc": 15.625, + "transfer_atk_acc": 16.25 + }, + { + "dataset": "math", + "type": "role", + "origin_acc": 18.12, + "origin_prompt": "Embodying a mathematics advisor, find the answer to the following math problem with {}.", + "atk_acc": 9.38, + "atk_prompt": "Embodying a mathematics advisor, find the subject to the such math item with {}. ", + "transfer_ori_acc": 15.0, + "transfer_atk_acc": 13.125 + } +] \ No newline at end of file diff --git a/results_transfer/google-flan-t5-large_google-flan-ul2/checklist_0_shot.json b/results_transfer/google-flan-t5-large_google-flan-ul2/checklist_0_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..2d9b84fdc1a9f2a6d75bc7ba094d46ffbb42c6ef --- /dev/null +++ b/results_transfer/google-flan-t5-large_google-flan-ul2/checklist_0_shot.json @@ -0,0 +1,242 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 77.9, + "origin_prompt": "Examine the given sentence and decide if it is grammatically sound. Answer with either 'Acceptable' or 'Unacceptable':", + "atk_acc": 62.6, + "atk_prompt": "Examine the given sentence and decide if it is grammatically sound. Answer with either 'Acceptable' or 'Unacceptable' IhuBIhoPGc:", + "transfer_ori_acc": 86.1, + "transfer_atk_acc": 84.0 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 76.3, + "origin_prompt": "As an instrument for grammar evaluation, consider the sentence and determine if its grammar is correct, responding with 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar:", + "atk_acc": 74.9, + "atk_prompt": "As an instrument for grammar evaluation, consider the sentence and determine if its grammar is correct, responding with 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar IhuBIhoPGc:", + "transfer_ori_acc": 86.3, + "transfer_atk_acc": 83.89999999999999 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 82.11, + "origin_prompt": "Do these two sentences have the same underlying meaning? Respond with 'equivalent' or 'not_equivalent'.", + "atk_acc": 79.17, + "atk_prompt": "Do these two sentences have the same underlying meaning? Respond with 'equivalent' or 'not_equivalent' MSAddJ4D2a. ", + "transfer_ori_acc": 86.02941176470588, + "transfer_atk_acc": 86.27450980392157 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 82.6, + "origin_prompt": "As a semantic comparison expert, evaluate the given pair of sentences and determine if they are 'equivalent' or 'not_equivalent'.", + "atk_acc": 80.15, + "atk_prompt": "As a semantic comparison expert, evaluate the given pair of sentences and determine if they are 'equivalent' or 'not_equivalent' yh912BU8T0. ", + "transfer_ori_acc": 84.31372549019608, + "transfer_atk_acc": 84.80392156862744 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 88.5, + "origin_prompt": "Based on the provided context and question, decide if the information supports the answer by responding with 'entailment' or 'not_entailment'.", + "atk_acc": 87.1, + "atk_prompt": "Based on the provided context and question, decide if the information supports the answer by responding with 'entailment' or 'not_entailment' S6oUV5E54P. ", + "transfer_ori_acc": 94.8, + "transfer_atk_acc": 94.69999999999999 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 89.4, + "origin_prompt": "As a semantic interpreter, assess whether the provided context supports the answer to the given question and answer with 'entailment' or 'not_entailment'.", + "atk_acc": 88.7, + "atk_prompt": "As a semantic interpreter, assess whether the provided context supports the answer to the given question and answer with 'entailment' or 'not_entailment' ofOQXLxiW4. ", + "transfer_ori_acc": 94.6, + "transfer_atk_acc": 94.5 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 87.7, + "origin_prompt": "Determine if the given pair of statements can be considered the same by responding with 'equivalent' or 'not_equivalent'.", + "atk_acc": 86.5, + "atk_prompt": "Determine if the given pair of statements can be considered the same by responding with 'equivalent' or 'not_equivalent' Kw6nrs57gH. ", + "transfer_ori_acc": 88.1, + "transfer_atk_acc": 89.2 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 86.9, + "origin_prompt": "Functioning as a question similarity evaluation tool, analyze the given questions and decide if they share the same meaning, responding with 'equivalent' or 'not_equivalent'.", + "atk_acc": 86.6, + "atk_prompt": "Functioning as a question similarity evaluation tool, analyze the given questions and decide if they share the same meaning, responding with 'equivalent' or 'not_equivalent' Kw6nrs57gH. ", + "transfer_ori_acc": 88.0, + "transfer_atk_acc": 88.9 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 86.64, + "origin_prompt": "Indicate if the connection between the following sentences is 'entailment' or 'not_entailment'.", + "atk_acc": 83.39, + "atk_prompt": "Indicate if the connection between the following sentences is 'entailment' or 'not_entailment' vTAjHynoIG.", + "transfer_ori_acc": 92.7797833935018, + "transfer_atk_acc": 92.4187725631769 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 84.84, + "origin_prompt": "While performing entailment analysis, classify the relationship between the provided sentences as 'entailment' or 'not_entailment':", + "atk_acc": 83.75, + "atk_prompt": "While performing entailment analysis, classify the relationship between the provided sentences as 'entailment' or 'not_entailment' CuVpbbkC4c:", + "transfer_ori_acc": 93.50180505415162, + "transfer_atk_acc": 92.4187725631769 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 94.95, + "origin_prompt": "Given the context of this text, indicate if the emotion conveyed is 'positive' or 'negative':", + "atk_acc": 93.58, + "atk_prompt": "Given the context of this text, indicate if the emotion conveyed is 'positive' or 'negative' LkF0FZxMZ4: ", + "transfer_ori_acc": 95.41284403669725, + "transfer_atk_acc": 94.4954128440367 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 95.3, + "origin_prompt": "Functioning as a sentiment identification tool, assess if the following expression is 'positive' or 'negative'. Classify:", + "atk_acc": 94.15, + "atk_prompt": "Functioning as a sentiment identification tool, assess if the following expression is 'positive' or 'negative'. Classify ItGDrrA1Sc: ", + "transfer_ori_acc": 95.9862385321101, + "transfer_atk_acc": 95.87155963302753 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 64.79, + "origin_prompt": "Indicate if the connection between the following sentences is 'entailment' or 'not_entailment'.", + "atk_acc": 61.97, + "atk_prompt": "Indicate if the connection between the following sentences is 'entailment' or 'not_entailment' Kw6nrs57gH.", + "transfer_ori_acc": 78.87323943661971, + "transfer_atk_acc": 74.64788732394366 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 66.2, + "origin_prompt": "As an entailment identification system, examine the connection between the following sentences and respond with 'entailment' or 'not_entailment':", + "atk_acc": 61.97, + "atk_prompt": "As an entailment identification system, examine the connection between the following sentences and respond with 'entailment' or 'not_entailment' SFPCqpiClT:", + "transfer_ori_acc": 80.28169014084507, + "transfer_atk_acc": 76.05633802816901 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 45.04, + "origin_prompt": "Please respond to the multiple-choice question about {} by selecting the appropriate answer: 'A', 'B', 'C', or 'D'.", + "atk_acc": 43.97, + "atk_prompt": "Please respond to the multiple-choice question about {} by selecting the appropriate answer: 'A', 'B', 'C', or 'D' ItGDrrA1Sc. ", + "transfer_ori_acc": 53.36879432624113, + "transfer_atk_acc": 52.4822695035461 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 44.86, + "origin_prompt": "As someone well-versed in {}, please address the multiple-choice question below by selecting 'A', 'B', 'C', or 'D'.", + "atk_acc": 43.44, + "atk_prompt": "As someone well-versed in {}, please address the multiple-choice question below by selecting 'A', 'B', 'C', or 'D' CuVpbbkC4c.", + "transfer_ori_acc": 52.659574468085104, + "transfer_atk_acc": 53.01418439716312 + }, + { + "dataset": "squad_v2", + "type": "task", + "origin_acc": 87.65, + "origin_prompt": "Please derive the most fitting answer from the context. If there isn't an answer in the context, respond with 'unanswerable'.", + "atk_acc": 86.82, + "atk_prompt": "Please derive the most fitting answer from the context. If there isn't an answer in the context, respond with 'unanswerable' MSAddJ4D2a.", + "transfer_ori_acc": 89.88229949874687, + "transfer_atk_acc": 89.54896616541353 + }, + { + "dataset": "squad_v2", + "type": "role", + "origin_acc": 88.05, + "origin_prompt": "As an authority on the context, locate the most accurate answer. If the context doesn't contain the answer, mention 'unanswerable'.", + "atk_acc": 87.15, + "atk_prompt": "As an authority on the context, locate the most accurate answer. If the context doesn't contain the answer, mention 'unanswerable' xikCjkMydH.", + "transfer_ori_acc": 88.98646616541353, + "transfer_atk_acc": 87.58646616541355 + }, + { + "dataset": "iwslt", + "type": "task", + "origin_acc": 0.14, + "origin_prompt": "Please perform a translation of the given sentence, converting it from {} to {}:", + "atk_acc": 0.1, + "atk_prompt": "Please perform a translation of the given sentence, converting it from {} to mzjVogkRhM {}:", + "transfer_ori_acc": 0.17287994792239936, + "transfer_atk_acc": 0.15893556619118035 + }, + { + "dataset": "iwslt", + "type": "role", + "origin_acc": 0.14, + "origin_prompt": "Taking on the role of a certified translator, perform a translation of the given sentence from {} to {}:", + "atk_acc": 0.08, + "atk_prompt": "Taking on the role of a certified translator, perform a translation of the given sentence from {} to zexKUTWJYG {}: ", + "transfer_ori_acc": 0.1727120569025014, + "transfer_atk_acc": 0.1633255034209739 + }, + { + "dataset": "un_multi", + "type": "task", + "origin_acc": 0.27, + "origin_prompt": "Accurately translate the sentence from {} to {}, ensuring the meaning remains intact:", + "atk_acc": 0.21, + "atk_prompt": "Accurately translate the sentence from {} to {}, ensuring the meaning remains intact EAB4KP2NVY:", + "transfer_ori_acc": 0.31735504988957897, + "transfer_atk_acc": 0.3098482661555996 + }, + { + "dataset": "un_multi", + "type": "role", + "origin_acc": 0.27, + "origin_prompt": "Assuming the role of an expert language translator, translate the given text from {} to {}:", + "atk_acc": 0.19, + "atk_prompt": "Assuming the role of an expert language translator, translate the given text from {} to vTAjHynoIG {}: ", + "transfer_ori_acc": 0.31373650381115664, + "transfer_atk_acc": 0.3103473904869731 + }, + { + "dataset": "math", + "type": "task", + "origin_acc": 16.25, + "origin_prompt": "Evaluate the following mathematical problem that includes {}:", + "atk_acc": 10.62, + "atk_prompt": "Evaluate the following mathematical problem that includes is77sOXAu8 {}:", + "transfer_ori_acc": 13.750000000000002, + "transfer_atk_acc": 12.5 + }, + { + "dataset": "math", + "type": "role", + "origin_acc": 14.37, + "origin_prompt": "As a math guru, evaluate the following mathematical task regarding {}:", + "atk_acc": 10.0, + "atk_prompt": "As a math guru, evaluate the following mathematical task regarding KjPJJ2a7RB {}: ", + "transfer_ori_acc": 15.0, + "transfer_atk_acc": 13.125 + } +] \ No newline at end of file diff --git a/results_transfer/google-flan-t5-large_google-flan-ul2/checklist_3_shot.json b/results_transfer/google-flan-t5-large_google-flan-ul2/checklist_3_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..d8b95294aede677c02864c8199d8f0833a3dbf60 --- /dev/null +++ b/results_transfer/google-flan-t5-large_google-flan-ul2/checklist_3_shot.json @@ -0,0 +1,242 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 78.1, + "origin_prompt": "Assess the following sentence and determine if it is grammatically correct. Respond with 'Acceptable' or 'Unacceptable'.", + "atk_acc": 77.0, + "atk_prompt": "Assess the following sentence and determine if it is grammatically correct. Respond with 'Acceptable' or 'Unacceptable' zexKUTWJYG. ", + "transfer_ori_acc": 86.1, + "transfer_atk_acc": 86.5 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 75.7, + "origin_prompt": "While performing grammar analysis, classify the grammar of the following sentence as 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar.", + "atk_acc": 74.6, + "atk_prompt": "While performing grammar analysis, classify the grammar of the following sentence as 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar zexKUTWJYG. ", + "transfer_ori_acc": 86.4, + "transfer_atk_acc": 86.5 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 82.84, + "origin_prompt": "Please analyze the provided sentences and indicate if their meanings are the same by choosing 'equivalent' or 'not_equivalent'.", + "atk_acc": 80.88, + "atk_prompt": "Please analyze the provided sentences and indicate if their meanings are the same by choosing 'equivalent' or 'not_equivalent' CuVpbbkC4c. ", + "transfer_ori_acc": 86.27450980392157, + "transfer_atk_acc": 86.02941176470588 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 80.15, + "origin_prompt": "In the role of a sentence comparison analyst, assess the provided sentences and indicate if they convey the same meaning by selecting 'equivalent' or 'not_equivalent'.", + "atk_acc": 79.66, + "atk_prompt": "In the role of a sentence comparison analyst, assess the provided sentences and indicate if they convey the same meaning by selecting 'equivalent' or 'not_equivalent' BWmsLx8xOA. ", + "transfer_ori_acc": 86.27450980392157, + "transfer_atk_acc": 86.02941176470588 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 87.8, + "origin_prompt": "Based on the information in the context, decide if the answer to the question is justified by choosing 'entailment' or 'not_entailment'.", + "atk_acc": 86.4, + "atk_prompt": "Based on the information in the context, decide if the answer to the question is justified by choosing 'entailment' or 'not_entailment' mOJ9nEwT4f. ", + "transfer_ori_acc": 93.8, + "transfer_atk_acc": 93.7 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 88.5, + "origin_prompt": "In your role as a semantic evaluator, determine if the provided context justifies the answer to the question and answer with 'entailment' or 'not_entailment'.", + "atk_acc": 87.7, + "atk_prompt": "In your role as a semantic evaluator, determine if the provided context justifies the answer to the question and answer with 'entailment' or 'not_entailment' MSAddJ4D2a. ", + "transfer_ori_acc": 93.7, + "transfer_atk_acc": 93.60000000000001 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 87.6, + "origin_prompt": "Are the meanings of these two phrases the same? Reply with 'equivalent' or 'not_equivalent'.", + "atk_acc": 87.4, + "atk_prompt": "Are the meanings of these two phrases the same? Reply with 'equivalent' or 'not_equivalent' Pbl3tYuoRo. ", + "transfer_ori_acc": 88.3, + "transfer_atk_acc": 88.6 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 86.5, + "origin_prompt": "As an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions.", + "atk_acc": 86.0, + "atk_prompt": "As an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions 6a4Yn3RGVc. ", + "transfer_ori_acc": 89.0, + "transfer_atk_acc": 89.3 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 84.84, + "origin_prompt": "Identify whether the given pair of sentences demonstrates entailment or not_entailment. Answer with 'entailment' or 'not_entailment'.", + "atk_acc": 83.39, + "atk_prompt": "Identify whether the given pair of sentences demonstrates entailment or not_entailment. Answer with 'entailment' or 'not_entailment' 6a4Yn3RGVc. ", + "transfer_ori_acc": 93.14079422382672, + "transfer_atk_acc": 93.14079422382672 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 84.12, + "origin_prompt": "In the capacity of an entailment assessment system, indicate if the link between the following sentences is 'entailment' or 'not_entailment'.", + "atk_acc": 82.31, + "atk_prompt": "In the capacity of an entailment assessment system, indicate if the link between the following sentences is 'entailment' or 'not_entailment' MQnugHcaoy. ", + "transfer_ori_acc": 92.4187725631769, + "transfer_atk_acc": 92.7797833935018 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 94.95, + "origin_prompt": "Analyze the tone of this statement and respond with either 'positive' or 'negative'.", + "atk_acc": 94.5, + "atk_prompt": "Analyze the tone of this statement and respond with either 'positive' or 'negative' XStsD2pUzu. ", + "transfer_ori_acc": 96.78899082568807, + "transfer_atk_acc": 96.78899082568807 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 95.41, + "origin_prompt": "Acting as a sentiment evaluator, identify if the given sentence is 'positive' or 'negative'.", + "atk_acc": 94.84, + "atk_prompt": "Acting as a sentiment evaluator, identify if the given sentence is 'positive' or 'negative' SFPCqpiClT. ", + "transfer_ori_acc": 96.55963302752293, + "transfer_atk_acc": 96.55963302752293 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 64.79, + "origin_prompt": "Assess the connection between the following sentences and classify it as 'entailment' or 'not_entailment'.", + "atk_acc": 63.38, + "atk_prompt": "Assess the connection between the following sentences and classify it as 'entailment' or 'not_entailment' Cev0s49fbe. ", + "transfer_ori_acc": 76.05633802816901, + "transfer_atk_acc": 77.46478873239437 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 66.2, + "origin_prompt": "As an entailment identification system, examine the connection between the following sentences and respond with 'entailment' or 'not_entailment'.", + "atk_acc": 66.2, + "atk_prompt": "As an entailment identification system, examine the connection between the following sentences and respond with 'entailment' or 'not_entailment' 5XeflW1ZJc. ", + "transfer_ori_acc": 76.05633802816901, + "transfer_atk_acc": 76.05633802816901 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 45.92, + "origin_prompt": "Answer the subsequent multiple-choice question about {} by picking the right option among 'A', 'B', 'C', or 'D'.", + "atk_acc": 45.04, + "atk_prompt": "Answer the subsequent multiple-choice question about {} by picking the right option among 'A', 'B', 'C', or 'D' KjPJJ2a7RB. ", + "transfer_ori_acc": 53.72340425531915, + "transfer_atk_acc": 53.90070921985816 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 46.99, + "origin_prompt": "Considering your familiarity with {}, attend to the following multiple-choice question by picking 'A', 'B', 'C', or 'D'.", + "atk_acc": 45.04, + "atk_prompt": "Considering your familiarity with {}, attend to the following multiple-choice question by picking 'A', 'B', 'C', or 'D' SFPCqpiClT.", + "transfer_ori_acc": 54.43262411347518, + "transfer_atk_acc": 53.36879432624113 + }, + { + "dataset": "squad_v2", + "type": "task", + "origin_acc": 87.91, + "origin_prompt": "From the context, provide the most precise answer. If the answer is not in the context, respond with 'unanswerable'.", + "atk_acc": 87.05, + "atk_prompt": "From the context, provide the most precise answer. If the answer is not in the context, respond with 'unanswerable' 5yWbBXztUY.", + "transfer_ori_acc": 89.38229949874687, + "transfer_atk_acc": 89.54896616541353 + }, + { + "dataset": "squad_v2", + "type": "role", + "origin_acc": 87.05, + "origin_prompt": "Drawing upon your expertise in the context, determine the most suitable answer. If an answer isn't available, state 'unanswerable'.", + "atk_acc": 86.55, + "atk_prompt": "Drawing upon your expertise in the context, determine the most suitable answer. If an answer isn't available, state 'unanswerable' 5XeflW1ZJc.", + "transfer_ori_acc": 89.54896616541353, + "transfer_atk_acc": 90.04896616541353 + }, + { + "dataset": "iwslt", + "type": "task", + "origin_acc": 0.22, + "origin_prompt": "Convert the following sentence from its original {} language to the target language {}.", + "atk_acc": 0.22, + "atk_prompt": "Convert the following sentence from its original {} language to the target language LkF0FZxMZ4 {}. ", + "transfer_ori_acc": 0.23981220411098694, + "transfer_atk_acc": 0.1929304765200739 + }, + { + "dataset": "iwslt", + "type": "role", + "origin_acc": 0.22, + "origin_prompt": "In the role of a language expert, perform a machine translation for the provided sentence, changing it from {} to {}.", + "atk_acc": 0.21, + "atk_prompt": "In the role of a language expert, perform a machine translation for the provided sentence, changing it from {} to MQnugHcaoy {}. ", + "transfer_ori_acc": 0.236844571841243, + "transfer_atk_acc": 0.22997780997943829 + }, + { + "dataset": "un_multi", + "type": "task", + "origin_acc": 0.31, + "origin_prompt": "Translate the following text from the source language {} to the target language {}.", + "atk_acc": 0.3, + "atk_prompt": "Translate the following text from the source language {} to the target language Cev0s49fbe {}. ", + "transfer_ori_acc": 0.35343571054545636, + "transfer_atk_acc": 0.3528993868239228 + }, + { + "dataset": "un_multi", + "type": "role", + "origin_acc": 0.31, + "origin_prompt": "In the capacity of a proficient translator, please change the following sentence from {} to {}.", + "atk_acc": 0.29, + "atk_prompt": "In the capacity of a proficient translator, please change the following sentence from {} to OB4KVJzIft {}. ", + "transfer_ori_acc": 0.3553613456359311, + "transfer_atk_acc": 0.35371471104543406 + }, + { + "dataset": "math", + "type": "task", + "origin_acc": 17.5, + "origin_prompt": "Resolve the following mathematical question associated with {}.", + "atk_acc": 15.62, + "atk_prompt": "Resolve the following mathematical question associated with 6a4Yn3RGVc {}. ", + "transfer_ori_acc": 16.25, + "transfer_atk_acc": 16.25 + }, + { + "dataset": "math", + "type": "role", + "origin_acc": 17.5, + "origin_prompt": "Imagine being a math mentor and determine the solution to the following problem with {}.", + "atk_acc": 15.62, + "atk_prompt": "Imagine being a math mentor and determine the solution to the following problem with b74X5IVXQY {}. ", + "transfer_ori_acc": 16.25, + "transfer_atk_acc": 15.625 + } +] \ No newline at end of file diff --git a/results_transfer/google-flan-t5-large_google-flan-ul2/deepwordbug_0_shot.json b/results_transfer/google-flan-t5-large_google-flan-ul2/deepwordbug_0_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..aa38f838bb3d13cdb6bf9ecbd088d661bcee7711 --- /dev/null +++ b/results_transfer/google-flan-t5-large_google-flan-ul2/deepwordbug_0_shot.json @@ -0,0 +1,242 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 77.9, + "origin_prompt": "Examine the given sentence and decide if it is grammatically sound. Answer with either 'Acceptable' or 'Unacceptable':", + "atk_acc": 31.1, + "atk_prompt": "Exapine the gievn sentene and dVecide if it is grammktically sound. Answer with either 'Acceptable' or 'Unacceptable':", + "transfer_ori_acc": 86.1, + "transfer_atk_acc": 85.6 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 76.3, + "origin_prompt": "As an instrument for grammar evaluation, consider the sentence and determine if its grammar is correct, responding with 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar:", + "atk_acc": 36.5, + "atk_prompt": "sA an instrument for grammar evaluatiJn, cnsider the setnence and deterbmine if its grmamar is crorect, reYponding with 'acceptable' for corect gramOar or 'unacceptable' for incorGect rgammar:", + "transfer_ori_acc": 86.3, + "transfer_atk_acc": 82.39999999999999 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 82.35, + "origin_prompt": "Compare the following sentences and determine if they share the same semantic meaning by responding with 'equivalent' or 'not_equivalent'.", + "atk_acc": 76.96, + "atk_prompt": "Compar the folhowing sentences and deWermine if they share the same semantic emaning by respondig with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 86.02941176470588, + "transfer_atk_acc": 86.76470588235294 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 82.6, + "origin_prompt": "In your capacity as a semantic comparison tool, examine the given sentences and decide if their meanings are identical, responding with 'equivalent' or 'not_equivalent'.", + "atk_acc": 62.25, + "atk_prompt": "In your capacity as a seantic coGparison tolo, exaine the given esentences and decide if their meanings are identiacl, resYonding with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 87.00980392156863, + "transfer_atk_acc": 86.27450980392157 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 89.8, + "origin_prompt": "Evaluate whether the given context supports the answer to the question by responding with 'entailment' or 'not_entailment'.", + "atk_acc": 38.6, + "atk_prompt": "Evlauate whyther the giGven cotext supporzs the answeQ to the quePtion by rseponding with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 93.89999999999999, + "transfer_atk_acc": 94.19999999999999 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 89.4, + "origin_prompt": "As a semantic interpreter, assess whether the provided context supports the answer to the given question and answer with 'entailment' or 'not_entailment'.", + "atk_acc": 2.1, + "atk_prompt": "sA a esmantic interpreter, aBssess whehter the proviedd contexYt spuports the answher to the gEven questin and answOr with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 94.6, + "transfer_atk_acc": 94.39999999999999 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 87.8, + "origin_prompt": "Can these two statements be considered equal in meaning? Answer with 'equivalent' or 'not_equivalent'.", + "atk_acc": 59.8, + "atk_prompt": "Can these tow statYements be cnosidered qeual in maening? AnsweI with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 87.4, + "transfer_atk_acc": 86.6 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 86.9, + "origin_prompt": "Functioning as a question similarity evaluation tool, analyze the given questions and decide if they share the same meaning, responding with 'equivalent' or 'not_equivalent'.", + "atk_acc": 81.4, + "atk_prompt": "Functioning as a question smiilarity eavluation toXl, analyze the givYen questions and decidae if they share the same meajning, responding with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 88.0, + "transfer_atk_acc": 88.5 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 86.64, + "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.", + "atk_acc": 81.95, + "atk_prompt": "Aer the following two Ysentences entailment or not_entailment? nswer me with \"entailment\" or \"not_entailment\", just one word. ", + "transfer_ori_acc": 94.22382671480143, + "transfer_atk_acc": 93.14079422382672 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 84.84, + "origin_prompt": "While performing entailment analysis, classify the relationship between the provided sentences as 'entailment' or 'not_entailment':", + "atk_acc": 82.67, + "atk_prompt": "While performing entailment analysis, cassify the reiationship between the proviLded sentences as 'entailment' or 'not_entailment':", + "transfer_ori_acc": 93.50180505415162, + "transfer_atk_acc": 94.22382671480143 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 94.84, + "origin_prompt": "Analyze the tone of this statement and respond with either 'positive' or 'negative':", + "atk_acc": 94.04, + "atk_prompt": "Analze the tone of this sattement and respond with either 'positive' or 'negative': ", + "transfer_ori_acc": 96.3302752293578, + "transfer_atk_acc": 96.10091743119266 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 95.3, + "origin_prompt": "Serving as a sentiment evaluation model, determine if the given statement is 'positive' or 'negative'. Classify:", + "atk_acc": 64.91, + "atk_prompt": "Servign as a sentimBnt envaluation model, Qetermine if the Iiven statemen is 'positive' or 'negative'. Classhfy: ", + "transfer_ori_acc": 96.21559633027523, + "transfer_atk_acc": 96.3302752293578 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 66.2, + "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.", + "atk_acc": 60.56, + "atk_prompt": "Are the follHwing Ntwo sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one wor. ", + "transfer_ori_acc": 78.87323943661971, + "transfer_atk_acc": 77.46478873239437 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 66.2, + "origin_prompt": "As an entailment identification system, examine the connection between the following sentences and respond with 'entailment' or 'not_entailment':", + "atk_acc": 57.75, + "atk_prompt": "As an entailment identfiication zystem, examine the connection between the flolowing sentences and respond with 'entailment' or 'not_entailment':", + "transfer_ori_acc": 80.28169014084507, + "transfer_atk_acc": 78.87323943661971 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 45.74, + "origin_prompt": "For each multiple-choice question about {}, identify the correct answer by selecting 'A', 'B', 'C', or 'D'.", + "atk_acc": 42.2, + "atk_prompt": "For each multiple-chotce question about {}, dentify the caorrect answedr by selccting 'A', 'B', 'C', or 'D'. ", + "transfer_ori_acc": 51.77304964539007, + "transfer_atk_acc": 52.659574468085104 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 44.86, + "origin_prompt": "Utilizing your expertise in {}, answer the following multiple-choice question by picking 'A', 'B', 'C', or 'D'.", + "atk_acc": 41.13, + "atk_prompt": "Utilizing your expertise in {}, ansKer the following multiprle-choice question by ipcking 'A', 'B', 'C', or 'D'.", + "transfer_ori_acc": 52.12765957446809, + "transfer_atk_acc": 52.659574468085104 + }, + { + "dataset": "squad_v2", + "type": "task", + "origin_acc": 87.55, + "origin_prompt": "Find the correct answer in the context provided. If an answer cannot be found, please respond with 'unanswerable'.", + "atk_acc": 85.32, + "atk_prompt": "Find the Dcorrect answer in the context provided. GIf an answer cannot be found, please erspond with 'unanswerable'.", + "transfer_ori_acc": 89.98646616541353, + "transfer_atk_acc": 89.88229949874687 + }, + { + "dataset": "squad_v2", + "type": "role", + "origin_acc": 88.05, + "origin_prompt": "As an expert with a deep understanding of the context, find the best answer. If the context doesn't include an answer, say 'unanswerable'.", + "atk_acc": 86.92, + "atk_prompt": "As an expert with a deep understanding of the context, fHind the best answer. If the context doesn't include an answer, say 'unanswerable'.", + "transfer_ori_acc": 89.44479949874687, + "transfer_atk_acc": 89.38229949874687 + }, + { + "dataset": "iwslt", + "type": "task", + "origin_acc": 0.14, + "origin_prompt": "Perform a machine translation for the provided sentence, changing it from {} to {}:", + "atk_acc": 0.08, + "atk_prompt": "Perorm a nachine translatiou for the providde sentecne, changing it from {} to {}:", + "transfer_ori_acc": 0.1762435094015067, + "transfer_atk_acc": 0.17554033895297877 + }, + { + "dataset": "iwslt", + "type": "role", + "origin_acc": 0.14, + "origin_prompt": "Taking on the role of a certified translator, perform a translation of the given sentence from {} to {}:", + "atk_acc": 0.07, + "atk_prompt": "TakIing on the roel of a certifieFd trLanslator, erform a translaton of the guiven Ventence from {} to {}: ", + "transfer_ori_acc": 0.1727120569025014, + "transfer_atk_acc": 0.16253885060575285 + }, + { + "dataset": "un_multi", + "type": "task", + "origin_acc": 0.27, + "origin_prompt": "Convert the following sentence from its original {} language to the target language {}:", + "atk_acc": 0.12, + "atk_prompt": "ConvHert the folloiwng setnence from its origianl {} langudage to the trget languaoe {}:", + "transfer_ori_acc": 0.31092395135800066, + "transfer_atk_acc": 0.3062529126605774 + }, + { + "dataset": "un_multi", + "type": "role", + "origin_acc": 0.27, + "origin_prompt": "As a qualified translator, convert the given sentence from its original {} language to the target language {}:", + "atk_acc": 0.16, + "atk_prompt": "ks a qualifieA translator, covnert the given sentence from its original {} language to the target lanugage {}: ", + "transfer_ori_acc": 0.3122539424318205, + "transfer_atk_acc": 0.30671914217818197 + }, + { + "dataset": "math", + "type": "task", + "origin_acc": 16.25, + "origin_prompt": "Evaluate the following mathematical problem that includes {}:", + "atk_acc": 8.12, + "atk_prompt": "vEaluate the followinLg mtahematical problem that incldes {}:", + "transfer_ori_acc": 13.750000000000002, + "transfer_atk_acc": 13.750000000000002 + }, + { + "dataset": "math", + "type": "role", + "origin_acc": 13.12, + "origin_prompt": "As a mathematics instructor, calculate the answer to the following problem related to {}:", + "atk_acc": 8.12, + "atk_prompt": "As a mathematics iestructor, calculate the answex to the following problem related to {}: ", + "transfer_ori_acc": 15.0, + "transfer_atk_acc": 15.0 + } +] \ No newline at end of file diff --git a/results_transfer/google-flan-t5-large_google-flan-ul2/deepwordbug_3_shot.json b/results_transfer/google-flan-t5-large_google-flan-ul2/deepwordbug_3_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..451ffbfcabad28402dd098b245c25f30d1600e63 --- /dev/null +++ b/results_transfer/google-flan-t5-large_google-flan-ul2/deepwordbug_3_shot.json @@ -0,0 +1,242 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 77.8, + "origin_prompt": "Is the provided sentence grammatically correct? Respond with 'Acceptable' or 'Unacceptable'.", + "atk_acc": 68.6, + "atk_prompt": "Is the provided semtence rammatically corrct? Respond with 'Acceptable' or 'Unacceptable'. ", + "transfer_ori_acc": 86.6, + "transfer_atk_acc": 85.7 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 75.7, + "origin_prompt": "While performing grammar analysis, classify the grammar of the following sentence as 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar.", + "atk_acc": 70.5, + "atk_prompt": "While hperforming grammNar anlaysis, classfiy the grammar of the foBlowing sentence as 'acceptable' for corrept grammgar or 'unacceptable' for incborrect grMammar. ", + "transfer_ori_acc": 86.4, + "transfer_atk_acc": 86.3 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 83.09, + "origin_prompt": "Evaluate whether the two provided sentences convey the same meaning by answering 'equivalent' or 'not_equivalent'.", + "atk_acc": 78.92, + "atk_prompt": "Evaluate whether the two provided senetnces convey the same meaning by answering 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 85.7843137254902, + "transfer_atk_acc": 85.5392156862745 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 81.37, + "origin_prompt": "As a semantic comparison expert, evaluate the given pair of sentences and determine if they are 'equivalent' or 'not_equivalent'.", + "atk_acc": 76.72, + "atk_prompt": "As a semantic comparisKon experEt, evaluate the given pari of sentence and determine if they are 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 85.04901960784314, + "transfer_atk_acc": 84.80392156862744 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 89.3, + "origin_prompt": "Based on the provided context and question, decide if the information supports the answer by responding with 'entailment' or 'not_entailment'.", + "atk_acc": 39.5, + "atk_prompt": "Baesd on the prohided conteMt and questino, dRcide if the infzrmation suppoLrts the aOnswer by resXonding with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 94.5, + "transfer_atk_acc": 92.2 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 88.8, + "origin_prompt": "In the capacity of a language evaluator, examine if the given context justifies the answer to the question and indicate your assessment with 'entailment' or 'not_entailment'.", + "atk_acc": 44.5, + "atk_prompt": "bn the cUapacity of a langtage valuator, Cxamine if the givcen contjxt jYustifies the anbwer to the qPuestion and idicate your assetssment with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 93.60000000000001, + "transfer_atk_acc": 93.2 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 88.1, + "origin_prompt": "Are the following two questions equivalent or not? Answer me with \"equivalent\" or \"not_equivalent\".", + "atk_acc": 77.3, + "atk_prompt": "re the flolowing two questFons equivalent or not? Answe me with \"equivalent\" or \"not_equivalent\". ", + "transfer_ori_acc": 88.5, + "transfer_atk_acc": 88.8 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 86.5, + "origin_prompt": "As an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions.", + "atk_acc": 81.8, + "atk_prompt": "As an instrument for question cotmparison evaluVtion, consider the questionPs and determbine if their maening is the same, responRing with 'equivalent' for similar questins or 'not_equivalent' for difserent questions. ", + "transfer_ori_acc": 89.0, + "transfer_atk_acc": 89.3 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 85.2, + "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.", + "atk_acc": 80.87, + "atk_prompt": "Aer the followig mwo sentences entailment or not_entailment? nswer me with \"entailment\" or \"not_entailment\", just noe word. ", + "transfer_ori_acc": 93.14079422382672, + "transfer_atk_acc": 93.14079422382672 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 84.12, + "origin_prompt": "Acting as an entailment detection instrument, determine if the given pair of sentences demonstrates entailment or not_entailment. Answer with 'entailment' or 'not_entailment'.", + "atk_acc": 81.59, + "atk_prompt": "Acting as an entailment detection instrument, dGetermine if the given pair of sentences demonstpates entailment or not_entailment. AVswer with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 93.50180505415162, + "transfer_atk_acc": 93.50180505415162 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 95.3, + "origin_prompt": "Considering the given phrase, would you say it carries a 'positive' or 'negative' connotation?", + "atk_acc": 93.92, + "atk_prompt": "Considering the given phraes, wouHd you Fay it carries a 'positive' or 'negative' connotavtion? ", + "transfer_ori_acc": 95.64220183486239, + "transfer_atk_acc": 95.64220183486239 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 95.41, + "origin_prompt": "Acting as a sentiment evaluator, identify if the given sentence is 'positive' or 'negative'.", + "atk_acc": 94.61, + "atk_prompt": "Acting as a seniment valuator, dientify if the giveIn sentence is 'positive' or 'negative'. ", + "transfer_ori_acc": 96.55963302752293, + "transfer_atk_acc": 96.78899082568807 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 64.79, + "origin_prompt": "Assess the connection between the following sentences and classify it as 'entailment' or 'not_entailment'.", + "atk_acc": 54.93, + "atk_prompt": "Assses the connection between the followoing sentence and classify it as 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 76.05633802816901, + "transfer_atk_acc": 77.46478873239437 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 66.2, + "origin_prompt": "In the capacity of an entailment assessment system, indicate if the link between the following sentences is 'entailment' or 'not_entailment'.", + "atk_acc": 56.34, + "atk_prompt": "In the capaciy of an entailment assessment system, inDdicate if the liQk between the followign sentences is 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 76.05633802816901, + "transfer_atk_acc": 76.05633802816901 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 46.81, + "origin_prompt": "In relation to the multiple-choice question on {}, please provide the accurate answer by choosing 'A', 'B', 'C', or 'D'.", + "atk_acc": 43.44, + "atk_prompt": "In relation to the ultiple-choice question on {}, lease rovide the accuraet answer by choosieg 'A', 'B', 'C', or 'D'. ", + "transfer_ori_acc": 53.54609929078015, + "transfer_atk_acc": 53.54609929078015 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 46.99, + "origin_prompt": "Considering your familiarity with {}, attend to the following multiple-choice question by picking 'A', 'B', 'C', or 'D'.", + "atk_acc": 43.44, + "atk_prompt": "Considering your familiarity with {}, attePd to the following multiple-hcoice question by pickiGng 'A', 'B', 'C', or 'D'.", + "transfer_ori_acc": 54.43262411347518, + "transfer_atk_acc": 53.54609929078015 + }, + { + "dataset": "squad_v2", + "type": "task", + "origin_acc": 87.91, + "origin_prompt": "From the context, provide the most precise answer. If the answer is not in the context, respond with 'unanswerable'.", + "atk_acc": 86.55, + "atk_prompt": "Frmo the context, provide the most precise answer. If the answer is not in the context, respond with 'unanswerable'.", + "transfer_ori_acc": 89.38229949874687, + "transfer_atk_acc": 89.54896616541353 + }, + { + "dataset": "squad_v2", + "type": "role", + "origin_acc": 87.05, + "origin_prompt": "As a subject matter expert, extract the correct answer from the context. If an answer is not present, indicate 'unanswerable'.", + "atk_acc": 86.59, + "atk_prompt": "As a subject matter expert, extract the ocrrect anser from the contNext. If an answer is not present, indicate 'unanswerable'.", + "transfer_ori_acc": 89.54896616541353, + "transfer_atk_acc": 90.88229949874687 + }, + { + "dataset": "iwslt", + "type": "task", + "origin_acc": 0.22, + "origin_prompt": "Convert the following sentence from its original {} language to the target language {}.", + "atk_acc": 0.21, + "atk_prompt": "tonvert the following senvtence from its original {} language to the target language {}. ", + "transfer_ori_acc": 0.23981220411098694, + "transfer_atk_acc": 0.23553793529356767 + }, + { + "dataset": "iwslt", + "type": "role", + "origin_acc": 0.22, + "origin_prompt": "In the role of a language expert, perform a machine translation for the provided sentence, changing it from {} to {}.", + "atk_acc": 0.14, + "atk_prompt": "In the role of a langauge expert, perforF a machine kranslation for the pLovided senteSce, changing it from {} to {}. ", + "transfer_ori_acc": 0.236844571841243, + "transfer_atk_acc": 0.23819751870596426 + }, + { + "dataset": "un_multi", + "type": "task", + "origin_acc": 0.31, + "origin_prompt": "Translate the provided sentence from {} to {} while maintaining the original meaning and context.", + "atk_acc": 0.3, + "atk_prompt": "Translate the proviWded entence from {} to {} while maintaining the oriinal meanin and conEtext. ", + "transfer_ori_acc": 0.35929731147922295, + "transfer_atk_acc": 0.354154230790244 + }, + { + "dataset": "un_multi", + "type": "role", + "origin_acc": 0.31, + "origin_prompt": "Taking on the role of a certified translator, perform a translation of the given sentence from {} to {}.", + "atk_acc": 0.26, + "atk_prompt": "Taking on the roEle of a certifLed ltranslator, perofrm a trasnlation of the giDven senence from {} to {}. ", + "transfer_ori_acc": 0.35510202593913104, + "transfer_atk_acc": 0.3540073096648902 + }, + { + "dataset": "math", + "type": "task", + "origin_acc": 17.5, + "origin_prompt": "Figure out the solution for the following mathematical task with {}.", + "atk_acc": 15.62, + "atk_prompt": "Figure out the slution for the following mathemaitcal task with {}. ", + "transfer_ori_acc": 16.25, + "transfer_atk_acc": 15.0 + }, + { + "dataset": "math", + "type": "role", + "origin_acc": 17.5, + "origin_prompt": "As a mathematics instructor, calculate the answer to the following problem related to {}.", + "atk_acc": 15.0, + "atk_prompt": "sA a mathematics instructor, calculate the anvwer to the following problem relDated to {}. ", + "transfer_ori_acc": 15.625, + "transfer_atk_acc": 15.0 + } +] \ No newline at end of file diff --git a/results_transfer/google-flan-t5-large_google-flan-ul2/stresstest_0_shot.json b/results_transfer/google-flan-t5-large_google-flan-ul2/stresstest_0_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..cb73818e227d408bb83332f996be6d54bd0db6cf --- /dev/null +++ b/results_transfer/google-flan-t5-large_google-flan-ul2/stresstest_0_shot.json @@ -0,0 +1,242 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 78.0, + "origin_prompt": "Assess the following sentence and determine if it is grammatically correct. Respond with 'Acceptable' or 'Unacceptable':", + "atk_acc": 63.5, + "atk_prompt": "Assess the following sentence and determine if it is grammatically correct. Respond with 'Acceptable' or 'Unacceptable' and false is not true :", + "transfer_ori_acc": 86.2, + "transfer_atk_acc": 82.19999999999999 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 76.3, + "origin_prompt": "As an instrument for grammar evaluation, consider the sentence and determine if its grammar is correct, responding with 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar:", + "atk_acc": 62.9, + "atk_prompt": "As an instrument for grammar evaluation, consider the sentence and determine if its grammar is correct, responding with 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar and true is true and true is true and true is true and true is true and true is true :", + "transfer_ori_acc": 86.3, + "transfer_atk_acc": 60.9 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 82.11, + "origin_prompt": "Do the meanings of these two statements align? Indicate your answer with 'equivalent' or 'not_equivalent'.", + "atk_acc": 29.66, + "atk_prompt": "Do the meanings of these two statements align? Indicate your answer with 'equivalent' or 'not_equivalent' and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 85.5392156862745, + "transfer_atk_acc": 85.29411764705883 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 81.62, + "origin_prompt": "As a language comparison expert, examine the given pair of sentences and decide if their meanings align, answering with 'equivalent' or 'not_equivalent'.", + "atk_acc": 15.93, + "atk_prompt": "As a language comparison expert, examine the given pair of sentences and decide if their meanings align, answering with 'equivalent' or 'not_equivalent' and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 86.27450980392157, + "transfer_atk_acc": 86.27450980392157 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 88.7, + "origin_prompt": "Based on the information in the context, decide if the answer to the question is justified by choosing 'entailment' or 'not_entailment'.", + "atk_acc": 78.6, + "atk_prompt": "Based on the information in the context, decide if the answer to the question is justified by choosing 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 94.3, + "transfer_atk_acc": 93.30000000000001 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 89.2, + "origin_prompt": "As a textual analyst, examine if the given context logically implies the answer to the question and indicate your decision with 'entailment' or 'not_entailment'.", + "atk_acc": 57.2, + "atk_prompt": "As a textual analyst, examine if the given context logically implies the answer to the question and indicate your decision with 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 93.5, + "transfer_atk_acc": 93.7 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 88.1, + "origin_prompt": "Are the following two questions equivalent or not? Answer me with \"equivalent\" or \"not_equivalent\".", + "atk_acc": 24.0, + "atk_prompt": "Are the following two questions equivalent or not? Answer me with \"equivalent\" or \"not_equivalent and false is not true \". ", + "transfer_ori_acc": 87.8, + "transfer_atk_acc": 87.4 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 86.9, + "origin_prompt": "In the role of a question similarity analyzer, examine the meaning of the provided questions and decide if they are equivalent, answering with 'equivalent' for equivalent questions or 'not_equivalent' for different questions.", + "atk_acc": 86.4, + "atk_prompt": "In the role of a question similarity analyzer, examine the meaning of the provided questions and decide if they are equivalent, answering with 'equivalent' for equivalent questions or 'not_equivalent' for different questions and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 88.1, + "transfer_atk_acc": 88.6 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 86.64, + "origin_prompt": "Analyze the two provided sentences and decide if their relationship is 'entailment' or 'not_entailment'.", + "atk_acc": 43.32, + "atk_prompt": "Analyze the two provided sentences and decide if their relationship is 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true .", + "transfer_ori_acc": 93.14079422382672, + "transfer_atk_acc": 91.69675090252709 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 84.84, + "origin_prompt": "In the capacity of an entailment assessment system, indicate if the link between the following sentences is 'entailment' or 'not_entailment':", + "atk_acc": 38.27, + "atk_prompt": "In the capacity of an entailment assessment system, indicate if the link between the following sentences is 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true :", + "transfer_ori_acc": 92.7797833935018, + "transfer_atk_acc": 91.69675090252709 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 94.84, + "origin_prompt": "Analyze the tone of this statement and respond with either 'positive' or 'negative':", + "atk_acc": 92.43, + "atk_prompt": "Analyze the tone of this statement and respond with either 'positive' or 'negative' and true is true and true is true and true is true and true is true and true is true : ", + "transfer_ori_acc": 96.3302752293578, + "transfer_atk_acc": 96.90366972477065 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 95.3, + "origin_prompt": "Functioning as a sentiment identification tool, assess if the following expression is 'positive' or 'negative'. Classify:", + "atk_acc": 93.81, + "atk_prompt": "Functioning as a sentiment identification tool, assess if the following expression is 'positive' or 'negative'. Classify and false is not true : ", + "transfer_ori_acc": 95.9862385321101, + "transfer_atk_acc": 91.39908256880734 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 66.2, + "origin_prompt": "Analyze the two provided sentences and decide if their relationship is 'entailment' or 'not_entailment'.", + "atk_acc": 26.76, + "atk_prompt": "Analyze the two provided sentences and decide if their relationship is 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true .", + "transfer_ori_acc": 77.46478873239437, + "transfer_atk_acc": 74.64788732394366 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 64.79, + "origin_prompt": "In the capacity of an entailment assessment system, indicate if the link between the following sentences is 'entailment' or 'not_entailment':", + "atk_acc": 25.35, + "atk_prompt": "In the capacity of an entailment assessment system, indicate if the link between the following sentences is 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true :", + "transfer_ori_acc": 78.87323943661971, + "transfer_atk_acc": 76.05633802816901 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 45.74, + "origin_prompt": "For each multiple-choice question about {}, identify the correct answer by selecting 'A', 'B', 'C', or 'D'.", + "atk_acc": 44.33, + "atk_prompt": "For each multiple-choice question about {}, identify the correct answer by selecting 'A', 'B', 'C', or 'D' and false is not true . ", + "transfer_ori_acc": 51.77304964539007, + "transfer_atk_acc": 51.77304964539007 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 44.86, + "origin_prompt": "Utilizing your expertise in {}, answer the following multiple-choice question by picking 'A', 'B', 'C', or 'D'.", + "atk_acc": 43.97, + "atk_prompt": "Utilizing your expertise in {}, answer the following multiple-choice question by picking 'A', 'B', 'C', or 'D' and false is not true .", + "transfer_ori_acc": 52.12765957446809, + "transfer_atk_acc": 51.41843971631206 + }, + { + "dataset": "squad_v2", + "type": "task", + "origin_acc": 87.55, + "origin_prompt": "Find the correct answer in the context provided. If an answer cannot be found, please respond with 'unanswerable'.", + "atk_acc": 86.82, + "atk_prompt": "Find the correct answer in the context provided. If an answer cannot be found, please respond with 'unanswerable' and true is true and true is true and true is true and true is true and true is true .", + "transfer_ori_acc": 89.98646616541353, + "transfer_atk_acc": 90.98646616541353 + }, + { + "dataset": "squad_v2", + "type": "role", + "origin_acc": 88.18, + "origin_prompt": "As a proficient expert in the given context, search for the most relevant answer. If the answer cannot be found, respond by saying 'unanswerable'.", + "atk_acc": 86.72, + "atk_prompt": "As a proficient expert in the given context, search for the most relevant answer. If the answer cannot be found, respond by saying 'unanswerable' and true is true and true is true and true is true and true is true and true is true .", + "transfer_ori_acc": 89.88229949874687, + "transfer_atk_acc": 90.54896616541353 + }, + { + "dataset": "iwslt", + "type": "task", + "origin_acc": 0.14, + "origin_prompt": "Please perform a translation of the given sentence, converting it from {} to {}:", + "atk_acc": 0.05, + "atk_prompt": "Please perform a translation of the given sentence, converting it from {} to and true is true and true is true and true is true and true is true and true is true {}:", + "transfer_ori_acc": 0.17287994792239936, + "transfer_atk_acc": 0.13390236942313585 + }, + { + "dataset": "iwslt", + "type": "role", + "origin_acc": 0.14, + "origin_prompt": "In the capacity of a proficient translator, please change the following sentence from {} to {}:", + "atk_acc": 0.04, + "atk_prompt": "In the capacity of a proficient translator, please change the following sentence from {} to and true is true and true is true and true is true and true is true and true is true {}: ", + "transfer_ori_acc": 0.17223574608131062, + "transfer_atk_acc": 0.14731943322573832 + }, + { + "dataset": "un_multi", + "type": "task", + "origin_acc": 0.27, + "origin_prompt": "Convert the following sentence from its original {} language to the target language {}:", + "atk_acc": 0.18, + "atk_prompt": "Convert the following sentence from its original {} language to the target language and true is true and true is true and true is true and true is true and true is true {}:", + "transfer_ori_acc": 0.31092395135800066, + "transfer_atk_acc": 0.3054065605018617 + }, + { + "dataset": "un_multi", + "type": "role", + "origin_acc": 0.27, + "origin_prompt": "Assuming the role of an expert language translator, translate the given text from {} to {}:", + "atk_acc": 0.16, + "atk_prompt": "Assuming the role of an expert language translator, translate the given text from {} to and false is not true {}: ", + "transfer_ori_acc": 0.31373650381115664, + "transfer_atk_acc": 0.30606374328334135 + }, + { + "dataset": "math", + "type": "task", + "origin_acc": 13.75, + "origin_prompt": "Work out the answer to this math problem featuring {}:", + "atk_acc": 11.88, + "atk_prompt": "Work out the answer to this math problem featuring and false is not true {}:", + "transfer_ori_acc": 12.5, + "transfer_atk_acc": 11.25 + }, + { + "dataset": "math", + "type": "role", + "origin_acc": 13.12, + "origin_prompt": "As a mathematics instructor, calculate the answer to the following problem related to {}:", + "atk_acc": 11.25, + "atk_prompt": "As a mathematics instructor, calculate the answer to the following problem related to and false is not true {}: ", + "transfer_ori_acc": 15.0, + "transfer_atk_acc": 13.750000000000002 + } +] \ No newline at end of file diff --git a/results_transfer/google-flan-t5-large_google-flan-ul2/stresstest_3_shot.json b/results_transfer/google-flan-t5-large_google-flan-ul2/stresstest_3_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..84974225bb612e97420cccbdd6c3c38d6aa7a6ba --- /dev/null +++ b/results_transfer/google-flan-t5-large_google-flan-ul2/stresstest_3_shot.json @@ -0,0 +1,242 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 77.8, + "origin_prompt": "Is the provided sentence grammatically correct? Respond with 'Acceptable' or 'Unacceptable'.", + "atk_acc": 75.5, + "atk_prompt": "Is the provided sentence grammatically correct? Respond with 'Acceptable' or 'Unacceptable' and false is not true . ", + "transfer_ori_acc": 86.6, + "transfer_atk_acc": 86.3 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 75.8, + "origin_prompt": "As an instrument for grammar evaluation, consider the sentence and determine if its grammar is correct, responding with 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar.", + "atk_acc": 75.2, + "atk_prompt": "As an instrument for grammar evaluation, consider the sentence and determine if its grammar is correct, responding with 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar and false is not true . ", + "transfer_ori_acc": 86.0, + "transfer_atk_acc": 86.2 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 83.09, + "origin_prompt": "Evaluate whether the two provided sentences convey the same meaning by answering 'equivalent' or 'not_equivalent'.", + "atk_acc": 24.51, + "atk_prompt": "Evaluate whether the two provided sentences convey the same meaning by answering 'equivalent' or 'not_equivalent' and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 85.7843137254902, + "transfer_atk_acc": 86.02941176470588 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 80.15, + "origin_prompt": "As a language comparison expert, examine the given pair of sentences and decide if their meanings align, answering with 'equivalent' or 'not_equivalent'.", + "atk_acc": 17.4, + "atk_prompt": "As a language comparison expert, examine the given pair of sentences and decide if their meanings align, answering with 'equivalent' or 'not_equivalent' and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 86.51960784313727, + "transfer_atk_acc": 85.5392156862745 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 87.8, + "origin_prompt": "Based on the information in the context, decide if the answer to the question is justified by choosing 'entailment' or 'not_entailment'.", + "atk_acc": 79.4, + "atk_prompt": "Based on the information in the context, decide if the answer to the question is justified by choosing 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 93.8, + "transfer_atk_acc": 94.39999999999999 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 88.8, + "origin_prompt": "In the capacity of a language evaluator, examine if the given context justifies the answer to the question and indicate your assessment with 'entailment' or 'not_entailment'.", + "atk_acc": 68.9, + "atk_prompt": "In the capacity of a language evaluator, examine if the given context justifies the answer to the question and indicate your assessment with 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 93.60000000000001, + "transfer_atk_acc": 94.1 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 88.1, + "origin_prompt": "Are the following two questions equivalent or not? Answer me with \"equivalent\" or \"not_equivalent\".", + "atk_acc": 23.4, + "atk_prompt": "Are the following two questions equivalent or not? Answer me with \"equivalent\" or \"not_equivalent and false is not true \". ", + "transfer_ori_acc": 88.5, + "transfer_atk_acc": 88.4 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 86.5, + "origin_prompt": "As an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions.", + "atk_acc": 86.4, + "atk_prompt": "As an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions and true is true . ", + "transfer_ori_acc": 89.0, + "transfer_atk_acc": 88.6 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 84.84, + "origin_prompt": "Identify whether the given pair of sentences demonstrates entailment or not_entailment. Answer with 'entailment' or 'not_entailment'.", + "atk_acc": 43.68, + "atk_prompt": "Identify whether the given pair of sentences demonstrates entailment or not_entailment. Answer with 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 93.14079422382672, + "transfer_atk_acc": 93.14079422382672 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 84.12, + "origin_prompt": "In the capacity of an entailment assessment system, indicate if the link between the following sentences is 'entailment' or 'not_entailment'.", + "atk_acc": 36.82, + "atk_prompt": "In the capacity of an entailment assessment system, indicate if the link between the following sentences is 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 92.4187725631769, + "transfer_atk_acc": 92.4187725631769 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 95.3, + "origin_prompt": "Considering the given phrase, would you say it carries a 'positive' or 'negative' connotation?", + "atk_acc": 94.61, + "atk_prompt": "Considering the given phrase, would you say it carries a 'positive' or 'negative' connotation and false is not true ? ", + "transfer_ori_acc": 95.64220183486239, + "transfer_atk_acc": 95.9862385321101 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 95.41, + "origin_prompt": "Acting as a sentiment evaluator, identify if the given sentence is 'positive' or 'negative'.", + "atk_acc": 95.07, + "atk_prompt": "Acting as a sentiment evaluator, identify if the given sentence is 'positive' or 'negative' and true is true . ", + "transfer_ori_acc": 96.55963302752293, + "transfer_atk_acc": 96.44495412844036 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 66.2, + "origin_prompt": "Analyze the two provided sentences and decide if their relationship is 'entailment' or 'not_entailment'.", + "atk_acc": 35.21, + "atk_prompt": "Analyze the two provided sentences and decide if their relationship is 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 77.46478873239437, + "transfer_atk_acc": 76.05633802816901 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 66.2, + "origin_prompt": "In the capacity of an entailment assessment system, indicate if the link between the following sentences is 'entailment' or 'not_entailment'.", + "atk_acc": 23.94, + "atk_prompt": "In the capacity of an entailment assessment system, indicate if the link between the following sentences is 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 76.05633802816901, + "transfer_atk_acc": 73.23943661971832 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 45.92, + "origin_prompt": "Answer the subsequent multiple-choice question about {} by picking the right option among 'A', 'B', 'C', or 'D'.", + "atk_acc": 44.86, + "atk_prompt": "Answer the subsequent multiple-choice question about {} by picking the right option among 'A', 'B', 'C', or 'D' and false is not true . ", + "transfer_ori_acc": 53.72340425531915, + "transfer_atk_acc": 52.12765957446809 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 46.63, + "origin_prompt": "As someone well-versed in {}, please address the multiple-choice question below by selecting 'A', 'B', 'C', or 'D'.", + "atk_acc": 44.86, + "atk_prompt": "As someone well-versed in {}, please address the multiple-choice question below by selecting 'A', 'B', 'C', or 'D' and false is not true .", + "transfer_ori_acc": 53.01418439716312, + "transfer_atk_acc": 51.95035460992907 + }, + { + "dataset": "squad_v2", + "type": "task", + "origin_acc": 87.91, + "origin_prompt": "From the context, provide the most precise answer. If the answer is not in the context, respond with 'unanswerable'.", + "atk_acc": 87.05, + "atk_prompt": "From the context, provide the most precise answer. If the answer is not in the context, respond with 'unanswerable' and true is true and true is true and true is true and true is true and true is true .", + "transfer_ori_acc": 89.38229949874687, + "transfer_atk_acc": 89.88229949874687 + }, + { + "dataset": "squad_v2", + "type": "role", + "origin_acc": 87.32, + "origin_prompt": "As a proficient expert in the given context, search for the most relevant answer. If the answer cannot be found, respond by saying 'unanswerable'.", + "atk_acc": 86.65, + "atk_prompt": "As a proficient expert in the given context, search for the most relevant answer. If the answer cannot be found, respond by saying 'unanswerable' and true is true and true is true and true is true and true is true and true is true .", + "transfer_ori_acc": 90.54896616541353, + "transfer_atk_acc": 90.54896616541353 + }, + { + "dataset": "iwslt", + "type": "task", + "origin_acc": 0.22, + "origin_prompt": "Convert the following sentence from its original {} language to the target language {}.", + "atk_acc": 0.22, + "atk_prompt": "Convert the following sentence from its original {} language to the target language and true is true {}. ", + "transfer_ori_acc": 0.23981220411098694, + "transfer_atk_acc": 0.19706994450619086 + }, + { + "dataset": "iwslt", + "type": "role", + "origin_acc": 0.22, + "origin_prompt": "In the role of a language expert, perform a machine translation for the provided sentence, changing it from {} to {}.", + "atk_acc": 0.21, + "atk_prompt": "In the role of a language expert, perform a machine translation for the provided sentence, changing it from {} to and false is not true {}. ", + "transfer_ori_acc": 0.236844571841243, + "transfer_atk_acc": 0.23378656190205807 + }, + { + "dataset": "un_multi", + "type": "task", + "origin_acc": 0.31, + "origin_prompt": "Translate the following text from the source language {} to the target language {}.", + "atk_acc": 0.3, + "atk_prompt": "Translate the following text from the source language {} to the target language and false is not true {}. ", + "transfer_ori_acc": 0.35343571054545636, + "transfer_atk_acc": 0.3526627578400292 + }, + { + "dataset": "un_multi", + "type": "role", + "origin_acc": 0.31, + "origin_prompt": "Taking on the role of a certified translator, perform a translation of the given sentence from {} to {}.", + "atk_acc": 0.3, + "atk_prompt": "Taking on the role of a certified translator, perform a translation of the given sentence from {} to and true is true and true is true and true is true and true is true and true is true {}. ", + "transfer_ori_acc": 0.35510202593913104, + "transfer_atk_acc": 0.3486065615860692 + }, + { + "dataset": "math", + "type": "task", + "origin_acc": 17.5, + "origin_prompt": "Find the solution for this mathematical challenge with {}.", + "atk_acc": 16.25, + "atk_prompt": "Find the solution for this mathematical challenge with and false is not true {}. ", + "transfer_ori_acc": 15.625, + "transfer_atk_acc": 13.750000000000002 + }, + { + "dataset": "math", + "type": "role", + "origin_acc": 18.12, + "origin_prompt": "Embodying a mathematics advisor, find the answer to the following math problem with {}.", + "atk_acc": 16.25, + "atk_prompt": "Embodying a mathematics advisor, find the answer to the following math problem with and false is not true {}. ", + "transfer_ori_acc": 15.0, + "transfer_atk_acc": 11.875 + } +] \ No newline at end of file diff --git a/results_transfer/google-flan-t5-large_google-flan-ul2/textbugger_0_shot.json b/results_transfer/google-flan-t5-large_google-flan-ul2/textbugger_0_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..be38cbc18aa5a998365a3af3e9767dac5a2f5804 --- /dev/null +++ b/results_transfer/google-flan-t5-large_google-flan-ul2/textbugger_0_shot.json @@ -0,0 +1,242 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 77.9, + "origin_prompt": "Examine the given sentence and decide if it is grammatically sound. Answer with either 'Acceptable' or 'Unacceptable':", + "atk_acc": 32.1, + "atk_prompt": "Examining the given condemnation and decide if it is pronoun sounds. Answer with neither 'Acceptable' or 'Unacceptable':", + "transfer_ori_acc": 86.1, + "transfer_atk_acc": 78.0 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 76.3, + "origin_prompt": "As an instrument for grammar evaluation, consider the sentence and determine if its grammar is correct, responding with 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar:", + "atk_acc": 46.8, + "atk_prompt": "Because an devices for grammar assessment, consdier the punishments and determining if its grammar is correct, responԁing with 'acceptable' for correct grammar or 'unacceptable' for incorrect pronunciation:", + "transfer_ori_acc": 86.3, + "transfer_atk_acc": 82.69999999999999 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 82.11, + "origin_prompt": "Do the meanings of these two statements align? Indicate your answer with 'equivalent' or 'not_equivalent'.", + "atk_acc": 75.25, + "atk_prompt": "Do the meanings of these two statemеnts alіgn? Denotes your answer with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 85.5392156862745, + "transfer_atk_acc": 85.7843137254902 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 81.62, + "origin_prompt": "As a language comparison expert, examine the given pair of sentences and decide if their meanings align, answering with 'equivalent' or 'not_equivalent'.", + "atk_acc": 74.26, + "atk_prompt": "As a language comparison e×pert, examine the given p air of sentences and decide if their meaings alin, answering with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 86.27450980392157, + "transfer_atk_acc": 87.00980392156863 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 89.8, + "origin_prompt": "Evaluate whether the given context supports the answer to the question by responding with 'entailment' or 'not_entailment'.", + "atk_acc": 56.8, + "atk_prompt": "Evɑluate lf the given context supports the replying to the question by answer with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 93.89999999999999, + "transfer_atk_acc": 93.4 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 89.4, + "origin_prompt": "As a semantic interpreter, assess whether the provided context supports the answer to the given question and answer with 'entailment' or 'not_entailment'.", + "atk_acc": 50.8, + "atk_prompt": "Since a semantic interpreter, as sess lf the provіded background supports the answer to the ɡiven question and answer with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 94.6, + "transfer_atk_acc": 93.0 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 88.1, + "origin_prompt": "Are the following two questions equivalent or not? Answer me with \"equivalent\" or \"not_equivalent\".", + "atk_acc": 78.6, + "atk_prompt": "Are the following tԝo matters equivalent or not? Replying me with \"equivalent\" or \"not_equivalent\". ", + "transfer_ori_acc": 87.8, + "transfer_atk_acc": 88.1 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 87.7, + "origin_prompt": "Acting as a question equivalence instrument, determine if the provided questions are equivalent in meaning, answering with 'equivalent' for similar questions or 'not_equivalent' for dissimilar ones.", + "atk_acc": 82.5, + "atk_prompt": "Actinɡ as a question equivaleոce inst rument, determine if the prvided questions are equivalent in mean, answering with 'equivalent' for similar questions or 'not_equivalent' for dіssimilar ones. ", + "transfer_ori_acc": 87.9, + "transfer_atk_acc": 87.7 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 86.64, + "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.", + "atk_acc": 82.31, + "atk_prompt": "Arе the following to sentences entailment or not_entailment? Respond me with \"entailment\" or \"not_entailment\", just one word. ", + "transfer_ori_acc": 94.22382671480143, + "transfer_atk_acc": 93.86281588447653 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 85.56, + "origin_prompt": "As an instrument for entailment evaluation, consider the two sentences and determine if their relationship is 'entailment' or 'not_entailment'. Respond with 'entailment' or 'not_entailment':", + "atk_acc": 82.67, + "atk_prompt": "As an instrument for entailment evaluation, consider the t wo sentences and determine if their relationѕhip is 'entailment' or 'not_entailment'. Respond with 'entailment' or 'not_entailment':", + "transfer_ori_acc": 93.50180505415162, + "transfer_atk_acc": 92.7797833935018 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 94.95, + "origin_prompt": "Given the context of this text, indicate if the emotion conveyed is 'positive' or 'negative':", + "atk_acc": 76.26, + "atk_prompt": "Giveո the background of this text, depicts if the emotions transmitted is 'positive' or 'negative': ", + "transfer_ori_acc": 95.41284403669725, + "transfer_atk_acc": 94.95412844036697 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 95.3, + "origin_prompt": "Serving as a sentiment evaluation model, determine if the given statement is 'positive' or 'negative'. Classify:", + "atk_acc": 88.53, + "atk_prompt": "Serving as a emotion evluation models, determining if the conferred state ment is 'positive' or 'negative'. Classify: ", + "transfer_ori_acc": 96.21559633027523, + "transfer_atk_acc": 94.95412844036697 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 66.2, + "origin_prompt": "Analyze the two provided sentences and decide if their relationship is 'entailment' or 'not_entailment'.", + "atk_acc": 57.75, + "atk_prompt": "Analyse the two provided sentences and dеcide if their relations is 'entailment' or 'not_entailment'.", + "transfer_ori_acc": 77.46478873239437, + "transfer_atk_acc": 77.46478873239437 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 66.2, + "origin_prompt": "As an entailment identification system, examine the connection between the following sentences and respond with 'entailment' or 'not_entailment':", + "atk_acc": 57.75, + "atk_prompt": "As an entailment identification system, investigated the connection between the subsequently sentences and replying with 'entailment' or 'not_entailment':", + "transfer_ori_acc": 80.28169014084507, + "transfer_atk_acc": 78.87323943661971 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 45.74, + "origin_prompt": "For each multiple-choice question about {}, identify the correct answer by selecting 'A', 'B', 'C', or 'D'.", + "atk_acc": 43.26, + "atk_prompt": "For each multip le-choice question about {}, identify the rectify answer by selеcting 'A', 'B', 'C', or 'D'. ", + "transfer_ori_acc": 51.77304964539007, + "transfer_atk_acc": 53.01418439716312 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 44.86, + "origin_prompt": "Utilizing your expertise in {}, answer the following multiple-choice question by picking 'A', 'B', 'C', or 'D'.", + "atk_acc": 40.96, + "atk_prompt": "Utilizіng your expertise in {}, answeⲅ the following multiple-choice question by collection 'A', 'B', 'C', or 'D'.", + "transfer_ori_acc": 52.12765957446809, + "transfer_atk_acc": 53.72340425531915 + }, + { + "dataset": "squad_v2", + "type": "task", + "origin_acc": 87.55, + "origin_prompt": "Find the correct answer in the context provided. If an answer cannot be found, please respond with 'unanswerable'.", + "atk_acc": 85.82, + "atk_prompt": "Find the corrected replying in the ϲontext given. If an answer cannot be found, please respond with 'unanswerable'.", + "transfer_ori_acc": 89.98646616541353, + "transfer_atk_acc": 90.54896616541353 + }, + { + "dataset": "squad_v2", + "type": "role", + "origin_acc": 88.05, + "origin_prompt": "As an authority on the context, locate the most accurate answer. If the context doesn't contain the answer, mention 'unanswerable'.", + "atk_acc": 81.52, + "atk_prompt": "As an authoⲅity on the context, locate the most meticulous respond. Whether the context doesn't contain the answer, mentioո 'unanswerable'.", + "transfer_ori_acc": 88.98646616541353, + "transfer_atk_acc": 88.70729949874686 + }, + { + "dataset": "iwslt", + "type": "task", + "origin_acc": 0.14, + "origin_prompt": "Please perform a translation of the given sentence, converting it from {} to {}:", + "atk_acc": 0.13, + "atk_prompt": "Please perfоrm a trɑnslation of the given sentence, converting it from {} to {}:", + "transfer_ori_acc": 0.17287994792239936, + "transfer_atk_acc": 0.17384832122858687 + }, + { + "dataset": "iwslt", + "type": "role", + "origin_acc": 0.14, + "origin_prompt": "In the capacity of a proficient translator, please change the following sentence from {} to {}:", + "atk_acc": 0.12, + "atk_prompt": "In the capacities of a proficient traոslator, рlease change the following sentence from {} to {}: ", + "transfer_ori_acc": 0.17223574608131062, + "transfer_atk_acc": 0.16503387004479023 + }, + { + "dataset": "un_multi", + "type": "task", + "origin_acc": 0.27, + "origin_prompt": "Accurately translate the sentence from {} to {}, ensuring the meaning remains intact:", + "atk_acc": 0.17, + "atk_prompt": "Scrupulously translate the sentence from {} to {}, ensuring the meani ng remains intact:", + "transfer_ori_acc": 0.31735504988957897, + "transfer_atk_acc": 0.3150453261764125 + }, + { + "dataset": "un_multi", + "type": "role", + "origin_acc": 0.27, + "origin_prompt": "Assuming the role of an expert language translator, translate the given text from {} to {}:", + "atk_acc": 0.19, + "atk_prompt": "Asѕuming the role of an experts la nguage translator, translate the ɡiven text from {} to {}: ", + "transfer_ori_acc": 0.31373650381115664, + "transfer_atk_acc": 0.2448382580575487 + }, + { + "dataset": "math", + "type": "task", + "origin_acc": 16.25, + "origin_prompt": "Evaluate the following mathematical problem that includes {}:", + "atk_acc": 10.0, + "atk_prompt": "Evaluate the following mathematical problem that inϲludes {}:", + "transfer_ori_acc": 13.750000000000002, + "transfer_atk_acc": 14.374999999999998 + }, + { + "dataset": "math", + "type": "role", + "origin_acc": 13.12, + "origin_prompt": "As a mathematics instructor, calculate the answer to the following problem related to {}:", + "atk_acc": 9.38, + "atk_prompt": "As a mathematics instructoⲅ, calculate the answers to the following problem related to {}: ", + "transfer_ori_acc": 15.0, + "transfer_atk_acc": 13.750000000000002 + } +] \ No newline at end of file diff --git a/results_transfer/google-flan-t5-large_google-flan-ul2/textbugger_3_shot.json b/results_transfer/google-flan-t5-large_google-flan-ul2/textbugger_3_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..515be8451a5dafb735d4ee91ecb4c00fcfe9adc6 --- /dev/null +++ b/results_transfer/google-flan-t5-large_google-flan-ul2/textbugger_3_shot.json @@ -0,0 +1,242 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 77.9, + "origin_prompt": "Examine the given sentence and decide if it is grammatically sound. Answer with either 'Acceptable' or 'Unacceptable'.", + "atk_acc": 54.9, + "atk_prompt": "Examine the yielded sentence and decide if it is pronouns audio. Answer with neither 'Acceptable' or 'Unacceptable'. ", + "transfer_ori_acc": 86.2, + "transfer_atk_acc": 85.0 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 76.4, + "origin_prompt": "Acting as a grammar detection instrument, determine if the provided sentence is grammatically sound, answering with 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar.", + "atk_acc": 70.0, + "atk_prompt": "Acting as a grammatical detection instruments, determining if the provided condemnation is grammatically sоund, answer with 'acceptable' for corⲅect grammatical or 'unacceptable' for incоrrect grɑmmar. ", + "transfer_ori_acc": 86.5, + "transfer_atk_acc": 86.4 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 82.6, + "origin_prompt": "Can the given sentences be considered semantically identical? Please reply with 'equivalent' or 'not_equivalent'.", + "atk_acc": 78.68, + "atk_prompt": "Can the given sntences be considered semantically idеntical? Please reply with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 87.00980392156863, + "transfer_atk_acc": 86.02941176470588 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 80.15, + "origin_prompt": "In the role of a sentence comparison analyst, assess the provided sentences and indicate if they convey the same meaning by selecting 'equivalent' or 'not_equivalent'.", + "atk_acc": 75.49, + "atk_prompt": "In the role of a sentence compraison psychoanalyst, assess the provided sentences and indicatе if they convey the same meani ng by selecting 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 86.27450980392157, + "transfer_atk_acc": 87.00980392156863 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 88.8, + "origin_prompt": "Evaluate whether the given context supports the answer to the question by responding with 'entailment' or 'not_entailment'.", + "atk_acc": 62.1, + "atk_prompt": "Evluate both the giѵen context supports the answer to the question by responding with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 94.0, + "transfer_atk_acc": 93.4 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 88.8, + "origin_prompt": "In the capacity of a language evaluator, examine if the given context justifies the answer to the question and indicate your assessment with 'entailment' or 'not_entailment'.", + "atk_acc": 56.0, + "atk_prompt": "In the capacity of a language evaluator, examining if the giѵen backgrounds ϳustifies the answer to the matter and indicates your assessment with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 93.60000000000001, + "transfer_atk_acc": 92.80000000000001 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 88.1, + "origin_prompt": "Are the following two questions equivalent or not? Answer me with \"equivalent\" or \"not_equivalent\".", + "atk_acc": 80.1, + "atk_prompt": "Are the following tԝo subjects equivalent or not? Anser me with \"equivalent\" or \"not_equivalent\". ", + "transfer_ori_acc": 88.5, + "transfer_atk_acc": 89.2 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 86.5, + "origin_prompt": "As an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions.", + "atk_acc": 75.7, + "atk_prompt": "Since an instrument for question compares evaluation, consider the problems and identify if their connotation is the same, respondinɡ with 'equivalent' for similar question or 'not_equivalent' for assorted issues. ", + "transfer_ori_acc": 89.0, + "transfer_atk_acc": 89.60000000000001 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 85.2, + "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.", + "atk_acc": 81.59, + "atk_prompt": "Be the fоllowing t wo sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word. ", + "transfer_ori_acc": 93.14079422382672, + "transfer_atk_acc": 93.14079422382672 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 84.12, + "origin_prompt": "In the capacity of an entailment assessment system, indicate if the link between the following sentences is 'entailment' or 'not_entailment'.", + "atk_acc": 81.59, + "atk_prompt": "For the capabilities of an entailment appraisals system, indicate if the link between the fo llowing sentences is 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 92.4187725631769, + "transfer_atk_acc": 92.7797833935018 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 94.95, + "origin_prompt": "Analyze the tone of this statement and respond with either 'positive' or 'negative'.", + "atk_acc": 94.15, + "atk_prompt": "Analyze the tone of this declaration and cater with or 'positive' or 'negative'. ", + "transfer_ori_acc": 96.78899082568807, + "transfer_atk_acc": 96.78899082568807 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 95.41, + "origin_prompt": "Acting as a sentiment evaluator, identify if the given sentence is 'positive' or 'negative'.", + "atk_acc": 93.92, + "atk_prompt": "Acting as a ambience evaluatoⲅ, identifies if the awarded sentence is 'positive' or 'negative'. ", + "transfer_ori_acc": 96.55963302752293, + "transfer_atk_acc": 96.67431192660551 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 64.79, + "origin_prompt": "Assess the connection between the following sentences and classify it as 'entailment' or 'not_entailment'.", + "atk_acc": 57.75, + "atk_prompt": "Assess the connecting between the subsequent sentence and classify it as 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 76.05633802816901, + "transfer_atk_acc": 77.46478873239437 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 66.2, + "origin_prompt": "Functioning as an entailment evaluation tool, analyze the provided sentences and decide if their relationship is 'entailment' or 'not_entailment'.", + "atk_acc": 57.75, + "atk_prompt": "Functioning as an entailment evaluation tool, analyze the furnished sentences and decide if their relationshiр is 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 76.05633802816901, + "transfer_atk_acc": 77.46478873239437 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 46.81, + "origin_prompt": "In relation to the multiple-choice question on {}, please provide the accurate answer by choosing 'A', 'B', 'C', or 'D'.", + "atk_acc": 43.62, + "atk_prompt": "In relation to the mlutiple-choice question on {}, pelase provide the meticulous answer by opt 'A', 'B', 'C', or 'D'. ", + "transfer_ori_acc": 53.54609929078015, + "transfer_atk_acc": 53.191489361702125 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 46.99, + "origin_prompt": "Considering your familiarity with {}, attend to the following multiple-choice question by picking 'A', 'B', 'C', or 'D'.", + "atk_acc": 43.97, + "atk_prompt": "Considering your knowledge with {}, attend to the following multip le-choice questions by рicking 'A', 'B', 'C', or 'D'.", + "transfer_ori_acc": 54.43262411347518, + "transfer_atk_acc": 53.90070921985816 + }, + { + "dataset": "squad_v2", + "type": "task", + "origin_acc": 87.15, + "origin_prompt": "Please extract the most appropriate answer from the context. If an answer is not present, indicate 'unanswerable'.", + "atk_acc": 86.55, + "atk_prompt": "Please extract the most appropriate answer from the context. Though an answer is not present, indicate 'unanswerable'.", + "transfer_ori_acc": 90.54896616541353, + "transfer_atk_acc": 91.04896616541353 + }, + { + "dataset": "squad_v2", + "type": "role", + "origin_acc": 87.05, + "origin_prompt": "As a subject matter expert, extract the correct answer from the context. If an answer is not present, indicate 'unanswerable'.", + "atk_acc": 85.95, + "atk_prompt": "As a subjected matter expeⲅt, extract the correct an swer from the context. If an answer is not present, indicate 'unanswerable'.", + "transfer_ori_acc": 89.54896616541353, + "transfer_atk_acc": 89.88229949874687 + }, + { + "dataset": "iwslt", + "type": "task", + "origin_acc": 0.22, + "origin_prompt": "Convert the following sentence from its original {} language to the target language {}.", + "atk_acc": 0.21, + "atk_prompt": "Convret the following sentence from its original {} language to the taⲅget language {}. ", + "transfer_ori_acc": 0.23981220411098694, + "transfer_atk_acc": 0.23048397949062127 + }, + { + "dataset": "iwslt", + "type": "role", + "origin_acc": 0.22, + "origin_prompt": "As a qualified translator, convert the given sentence from its original {} language to the target language {}.", + "atk_acc": 0.21, + "atk_prompt": "Since a qualifid translator, coոvert the given sentence from its o riginal {} language to the target language {}. ", + "transfer_ori_acc": 0.24717757056761575, + "transfer_atk_acc": 0.24072043133873097 + }, + { + "dataset": "un_multi", + "type": "task", + "origin_acc": 0.31, + "origin_prompt": "Translate the following text from the source language {} to the target language {}.", + "atk_acc": 0.29, + "atk_prompt": "Translate the subsequent text from the source tongue {} to the target lanugage {}. ", + "transfer_ori_acc": 0.35343571054545636, + "transfer_atk_acc": 0.35644412686512444 + }, + { + "dataset": "un_multi", + "type": "role", + "origin_acc": 0.31, + "origin_prompt": "Taking on the role of a certified translator, perform a translation of the given sentence from {} to {}.", + "atk_acc": 0.28, + "atk_prompt": "Taking on the role of a certified translator, performed a translator of the giѵen sentencе from {} to {}. ", + "transfer_ori_acc": 0.35510202593913104, + "transfer_atk_acc": 0.35291126280843177 + }, + { + "dataset": "math", + "type": "task", + "origin_acc": 17.5, + "origin_prompt": "Resolve the following mathematical question associated with {}.", + "atk_acc": 15.62, + "atk_prompt": "Reslove the following math question associated with {}. ", + "transfer_ori_acc": 16.25, + "transfer_atk_acc": 15.625 + }, + { + "dataset": "math", + "type": "role", + "origin_acc": 17.5, + "origin_prompt": "As a mathematics instructor, calculate the answer to the following problem related to {}.", + "atk_acc": 15.62, + "atk_prompt": "As a mathematics trainers, calculate the respond to the following problem related to {}. ", + "transfer_ori_acc": 15.625, + "transfer_atk_acc": 15.0 + } +] \ No newline at end of file diff --git a/results_transfer/google-flan-t5-large_google-flan-ul2/textfooler_0_shot.json b/results_transfer/google-flan-t5-large_google-flan-ul2/textfooler_0_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..6d8c2aa08ac9072f5178d06d3c933b9984a2cab6 --- /dev/null +++ b/results_transfer/google-flan-t5-large_google-flan-ul2/textfooler_0_shot.json @@ -0,0 +1,242 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 77.5, + "origin_prompt": "Determine if the grammar of the given sentence is 'Acceptable' or 'Unacceptable':", + "atk_acc": 31.0, + "atk_prompt": "Establishes if the jargon of the made punishments is 'Acceptable' or 'Unacceptable':", + "transfer_ori_acc": 86.0, + "transfer_atk_acc": 41.099999999999994 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 76.3, + "origin_prompt": "As an instrument for grammar evaluation, consider the sentence and determine if its grammar is correct, responding with 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar:", + "atk_acc": 30.8, + "atk_prompt": "Since an apparatus for language measured, envisage the convicts and identify if its language is correct, reaction with 'acceptable' for accurate jargon or 'unacceptable' for unsound pronunciation:", + "transfer_ori_acc": 86.3, + "transfer_atk_acc": 80.2 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 82.11, + "origin_prompt": "Do these two sentences have the same underlying meaning? Respond with 'equivalent' or 'not_equivalent'.", + "atk_acc": 53.68, + "atk_prompt": "Ca these two condemning ont the same main meaning? Reactions with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 86.02941176470588, + "transfer_atk_acc": 83.57843137254902 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 81.62, + "origin_prompt": "As a language comparison expert, examine the given pair of sentences and decide if their meanings align, answering with 'equivalent' or 'not_equivalent'.", + "atk_acc": 12.01, + "atk_prompt": "As a language compared professional, reviews the given torque of sentences and picked if their sens unify, address with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 86.27450980392157, + "transfer_atk_acc": 86.51960784313727 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 89.8, + "origin_prompt": "Evaluate whether the given context supports the answer to the question by responding with 'entailment' or 'not_entailment'.", + "atk_acc": 41.4, + "atk_prompt": "Estimation whether the gave backgrounds perpetuates the answered to the trouble by responses with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 93.89999999999999, + "transfer_atk_acc": 93.89999999999999 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 89.4, + "origin_prompt": "As a semantic interpreter, assess whether the provided context supports the answer to the given question and answer with 'entailment' or 'not_entailment'.", + "atk_acc": 3.3, + "atk_prompt": "Iike a semantic performers, valuing whether the afforded ambient perpetuates the answer to the rendered item and cope with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 94.6, + "transfer_atk_acc": 85.5 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 87.7, + "origin_prompt": "Assess whether the following statements are identical in meaning by answering 'equivalent' or 'not_equivalent'.", + "atk_acc": 18.9, + "atk_prompt": "Valuing whether the later languages are comparable in sinn by cope 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 87.7, + "transfer_atk_acc": 87.7 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 86.9, + "origin_prompt": "In the role of a question similarity analyzer, examine the meaning of the provided questions and decide if they are equivalent, answering with 'equivalent' for equivalent questions or 'not_equivalent' for different questions.", + "atk_acc": 73.8, + "atk_prompt": "About the rol of a question likeness analyzer, examine the mean of the conferred topic and identifying if they are equivalent, retaliated with 'equivalent' for equivalent trouble or 'not_equivalent' for other topics. ", + "transfer_ori_acc": 88.1, + "transfer_atk_acc": 89.1 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 86.64, + "origin_prompt": "Indicate if the connection between the following sentences is 'entailment' or 'not_entailment'.", + "atk_acc": 80.87, + "atk_prompt": "Portrays if the login between the following punishments is 'entailment' or 'not_entailment'.", + "transfer_ori_acc": 92.7797833935018, + "transfer_atk_acc": 92.4187725631769 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 84.84, + "origin_prompt": "In the capacity of an entailment assessment system, indicate if the link between the following sentences is 'entailment' or 'not_entailment':", + "atk_acc": 79.06, + "atk_prompt": "About the capacity of an entailment weighed system, specified if the link between the following phrases is 'entailment' or 'not_entailment':", + "transfer_ori_acc": 92.7797833935018, + "transfer_atk_acc": 91.69675090252709 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 94.95, + "origin_prompt": "Given the context of this text, indicate if the emotion conveyed is 'positive' or 'negative':", + "atk_acc": 84.29, + "atk_prompt": "Made the background of this version, depicts if the passion recharged is 'positive' or 'negative': ", + "transfer_ori_acc": 95.41284403669725, + "transfer_atk_acc": 95.06880733944955 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 95.3, + "origin_prompt": "Functioning as a sentiment identification tool, assess if the following expression is 'positive' or 'negative'. Classify:", + "atk_acc": 25.46, + "atk_prompt": "Functions as a emotion defining instrumentation, valuing if the next voicing is 'positive' or 'negative'. Classification: ", + "transfer_ori_acc": 95.9862385321101, + "transfer_atk_acc": 94.83944954128441 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 66.2, + "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.", + "atk_acc": 56.34, + "atk_prompt": "Be the following two phrases entailment or not_entailment? Responded me with \"entailment\" or \"not_entailment\", just one verb. ", + "transfer_ori_acc": 78.87323943661971, + "transfer_atk_acc": 74.64788732394366 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 64.79, + "origin_prompt": "In the capacity of an entailment assessment system, indicate if the link between the following sentences is 'entailment' or 'not_entailment':", + "atk_acc": 56.34, + "atk_prompt": "Among the capacity of an entailment analysis system, indicate if the attach between the subsequently sentences is 'entailment' or 'not_entailment':", + "transfer_ori_acc": 78.87323943661971, + "transfer_atk_acc": 74.64788732394366 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 45.74, + "origin_prompt": "For each multiple-choice question about {}, identify the correct answer by selecting 'A', 'B', 'C', or 'D'.", + "atk_acc": 37.06, + "atk_prompt": "By any multiple-choice question about {}, locate the corrected riposte by akhtar 'A', 'B', 'C', or 'D'. ", + "transfer_ori_acc": 51.77304964539007, + "transfer_atk_acc": 52.836879432624116 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 44.86, + "origin_prompt": "Utilizing your expertise in {}, answer the following multiple-choice question by picking 'A', 'B', 'C', or 'D'.", + "atk_acc": 40.96, + "atk_prompt": "Utilise your expertise in {}, meet the next multiple-choice question by collection 'A', 'B', 'C', or 'D'.", + "transfer_ori_acc": 52.12765957446809, + "transfer_atk_acc": 53.54609929078015 + }, + { + "dataset": "squad_v2", + "type": "task", + "origin_acc": 87.65, + "origin_prompt": "Please derive the most fitting answer from the context. If there isn't an answer in the context, respond with 'unanswerable'.", + "atk_acc": 84.09, + "atk_prompt": "Please subtract the most adjust answered from the context. Until there isn't an replied in the context, answered with 'unanswerable'.", + "transfer_ori_acc": 89.88229949874687, + "transfer_atk_acc": 89.98229949874687 + }, + { + "dataset": "squad_v2", + "type": "role", + "origin_acc": 88.05, + "origin_prompt": "As an authority on the context, locate the most accurate answer. If the context doesn't contain the answer, mention 'unanswerable'.", + "atk_acc": 77.31, + "atk_prompt": "As an authority on the context, find the most proper rebuttal. Though the context doesn't incorporated the answering, speaks 'unanswerable'.", + "transfer_ori_acc": 88.98646616541353, + "transfer_atk_acc": 55.74122807017544 + }, + { + "dataset": "iwslt", + "type": "task", + "origin_acc": 0.14, + "origin_prompt": "Please perform a translation of the given sentence, converting it from {} to {}:", + "atk_acc": 0.13, + "atk_prompt": "Please accomplishing a translators of the given judging, converting it from {} to {}:", + "transfer_ori_acc": 0.17287994792239936, + "transfer_atk_acc": 0.17242775573031324 + }, + { + "dataset": "iwslt", + "type": "role", + "origin_acc": 0.14, + "origin_prompt": "In the role of a language expert, perform a machine translation for the provided sentence, changing it from {} to {}:", + "atk_acc": 0.06, + "atk_prompt": "Under the features of a parlance specializing, realization a typewriter translator for the envisaged chastisement, altering it from {} to {}: ", + "transfer_ori_acc": 0.1757473985410512, + "transfer_atk_acc": 0.14220973997645037 + }, + { + "dataset": "un_multi", + "type": "task", + "origin_acc": 0.27, + "origin_prompt": "Convert the following sentence from its original {} language to the target language {}:", + "atk_acc": 0.11, + "atk_prompt": "Transformations the later sanctions from its preliminary {} linguistics to the fated parlance {}:", + "transfer_ori_acc": 0.31092395135800066, + "transfer_atk_acc": 0.25185973744569756 + }, + { + "dataset": "un_multi", + "type": "role", + "origin_acc": 0.27, + "origin_prompt": "In the role of a language expert, perform a machine translation for the provided sentence, changing it from {} to {}:", + "atk_acc": 0.15, + "atk_prompt": "Under the role of a language skilled, performed a machine interpreters for the furnished condemned, transform it from {} to {}: ", + "transfer_ori_acc": 0.31256243540663714, + "transfer_atk_acc": 0.2918302355274189 + }, + { + "dataset": "math", + "type": "task", + "origin_acc": 13.75, + "origin_prompt": "Work out the answer to this math problem featuring {}:", + "atk_acc": 1.88, + "atk_prompt": "Work out the address to this math problem idiosyncratic {}:", + "transfer_ori_acc": 12.5, + "transfer_atk_acc": 4.375 + }, + { + "dataset": "math", + "type": "role", + "origin_acc": 13.12, + "origin_prompt": "As a mathematics instructor, calculate the answer to the following problem related to {}:", + "atk_acc": 1.25, + "atk_prompt": "As a mathematics prof, calculate the address to the following problem related to {}: ", + "transfer_ori_acc": 15.0, + "transfer_atk_acc": 4.375 + } +] \ No newline at end of file diff --git a/results_transfer/google-flan-t5-large_google-flan-ul2/textfooler_3_shot.json b/results_transfer/google-flan-t5-large_google-flan-ul2/textfooler_3_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..1d387f6de0d5bb2b2c458fcc1f43141c997a2473 --- /dev/null +++ b/results_transfer/google-flan-t5-large_google-flan-ul2/textfooler_3_shot.json @@ -0,0 +1,242 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 77.8, + "origin_prompt": "Is the provided sentence grammatically correct? Respond with 'Acceptable' or 'Unacceptable'.", + "atk_acc": 70.3, + "atk_prompt": "Become the brought sentence grammatically correction? Reactions with 'Acceptable' or 'Unacceptable'. ", + "transfer_ori_acc": 86.6, + "transfer_atk_acc": 85.8 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 75.7, + "origin_prompt": "While performing grammar analysis, classify the grammar of the following sentence as 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar.", + "atk_acc": 70.5, + "atk_prompt": "If performing pronunciation analyzing, classify the grammar of the following condemnation as 'acceptable' for adequate jargon or 'unacceptable' for misspelled dictionary. ", + "transfer_ori_acc": 86.4, + "transfer_atk_acc": 86.6 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 83.09, + "origin_prompt": "Evaluate whether the two provided sentences convey the same meaning by answering 'equivalent' or 'not_equivalent'.", + "atk_acc": 78.92, + "atk_prompt": "Estimation whether the two provided convictions despatch the same meaning by answers 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 85.7843137254902, + "transfer_atk_acc": 85.04901960784314 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 80.15, + "origin_prompt": "In the role of a sentence comparison analyst, assess the provided sentences and indicate if they convey the same meaning by selecting 'equivalent' or 'not_equivalent'.", + "atk_acc": 75.25, + "atk_prompt": "In the role of a sentence likened psychoanalyst, assess the awarded sentences and indicate if they convey the same feeling by selecting 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 86.27450980392157, + "transfer_atk_acc": 86.51960784313727 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 88.8, + "origin_prompt": "Evaluate whether the given context supports the answer to the question by responding with 'entailment' or 'not_entailment'.", + "atk_acc": 36.0, + "atk_prompt": "Valuing whether the bestowed framework perpetuates the responded to the tura by retaliated with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 94.0, + "transfer_atk_acc": 82.3 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 88.8, + "origin_prompt": "In the capacity of a language evaluator, examine if the given context justifies the answer to the question and indicate your assessment with 'entailment' or 'not_entailment'.", + "atk_acc": 29.8, + "atk_prompt": "Of the jurisdictional of a phraseology evaluator, exams if the handed regard vindicated the solution to the matter and clarified your examinations with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 93.60000000000001, + "transfer_atk_acc": 81.39999999999999 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 88.1, + "origin_prompt": "textfooler", + "atk_acc": 70.9, + "atk_prompt": "Become the farther two subjects equivalent or not? Retaliated me with \"equivalent\" or \"not_equivalent\". ", + "transfer_ori_acc": 84.6, + "transfer_atk_acc": 88.7 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 86.5, + "origin_prompt": "As an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions.", + "atk_acc": 63.4, + "atk_prompt": "As an tool for topic relation valuing, look the theme and identify if their connotation is the same, retaliated with 'equivalent' for similar question or 'not_equivalent' for other trouble. ", + "transfer_ori_acc": 89.0, + "transfer_atk_acc": 88.9 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 84.84, + "origin_prompt": "Identify whether the given pair of sentences demonstrates entailment or not_entailment. Answer with 'entailment' or 'not_entailment'.", + "atk_acc": 80.51, + "atk_prompt": "Defined whether the made peers of sentences showings entailment or not_entailment. Reaction with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 93.14079422382672, + "transfer_atk_acc": 92.7797833935018 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 84.12, + "origin_prompt": "Acting as an entailment detection instrument, determine if the given pair of sentences demonstrates entailment or not_entailment. Answer with 'entailment' or 'not_entailment'.", + "atk_acc": 79.78, + "atk_prompt": "Acting as an entailment detects instrument, defined if the given torque of sentences showcases entailment or not_entailment. Replies with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 93.50180505415162, + "transfer_atk_acc": 92.4187725631769 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 94.95, + "origin_prompt": "Analyze the tone of this statement and respond with either 'positive' or 'negative'.", + "atk_acc": 94.27, + "atk_prompt": "Explore the tone of this affirmations and react with either 'positive' or 'negative'. ", + "transfer_ori_acc": 96.78899082568807, + "transfer_atk_acc": 96.78899082568807 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 95.53, + "origin_prompt": "In the role of a sentiment analysis tool, respond with 'positive' or 'negative' to classify this statement.", + "atk_acc": 94.04, + "atk_prompt": "In the role of a emotions exploring toolbox, respond with 'positive' or 'negative' to sorted this affirmations. ", + "transfer_ori_acc": 96.67431192660551, + "transfer_atk_acc": 96.55963302752293 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 64.79, + "origin_prompt": "Assess the connection between the following sentences and classify it as 'entailment' or 'not_entailment'.", + "atk_acc": 54.93, + "atk_prompt": "Assess the connecting between the subsequent phrases and classify it as 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 76.05633802816901, + "transfer_atk_acc": 76.05633802816901 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 66.2, + "origin_prompt": "Functioning as an entailment evaluation tool, analyze the provided sentences and decide if their relationship is 'entailment' or 'not_entailment'.", + "atk_acc": 56.34, + "atk_prompt": "Functioning as an entailment evaluation tool, analyze the afforded sentences and chosen if their liaise is 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 76.05633802816901, + "transfer_atk_acc": 76.05633802816901 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 46.45, + "origin_prompt": "For each multiple-choice question about {}, identify the correct answer by selecting 'A', 'B', 'C', or 'D'.", + "atk_acc": 37.41, + "atk_prompt": "About each multiple-choice question about {}, find the rectify rebuttal by choices 'A', 'B', 'C', or 'D'. ", + "transfer_ori_acc": 53.191489361702125, + "transfer_atk_acc": 49.46808510638298 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 46.99, + "origin_prompt": "Considering your familiarity with {}, attend to the following multiple-choice question by picking 'A', 'B', 'C', or 'D'.", + "atk_acc": 43.44, + "atk_prompt": "Explored your familiarity with {}, participate to the next multiple-choice issues by pickup 'A', 'B', 'C', or 'D'.", + "transfer_ori_acc": 54.43262411347518, + "transfer_atk_acc": 53.54609929078015 + }, + { + "dataset": "squad_v2", + "type": "task", + "origin_acc": 87.91, + "origin_prompt": "From the context, provide the most precise answer. If the answer is not in the context, respond with 'unanswerable'.", + "atk_acc": 86.05, + "atk_prompt": "Against the context, give the most correct answer. If the solution is not in the context, respond with 'unanswerable'.", + "transfer_ori_acc": 89.38229949874687, + "transfer_atk_acc": 89.88229949874687 + }, + { + "dataset": "squad_v2", + "type": "role", + "origin_acc": 87.05, + "origin_prompt": "As a subject matter expert, extract the correct answer from the context. If an answer is not present, indicate 'unanswerable'.", + "atk_acc": 85.45, + "atk_prompt": "As a subject matter expert, extract the correct reacts from the context. If an answer is not present, indicate 'unanswerable'.", + "transfer_ori_acc": 89.54896616541353, + "transfer_atk_acc": 90.54896616541353 + }, + { + "dataset": "iwslt", + "type": "task", + "origin_acc": 0.22, + "origin_prompt": "Convert the following sentence from its original {} language to the target language {}.", + "atk_acc": 0.13, + "atk_prompt": "Conversion the later conviction from its indigenous {} vocabulary to the blanco tongues {}. ", + "transfer_ori_acc": 0.23981220411098694, + "transfer_atk_acc": 0.19057711840277197 + }, + { + "dataset": "iwslt", + "type": "role", + "origin_acc": 0.22, + "origin_prompt": "In the role of a language expert, perform a machine translation for the provided sentence, changing it from {} to {}.", + "atk_acc": 0.16, + "atk_prompt": "In the role of a languages specializing, exerting a machine converting for the handed pity, metamorphosis it from {} to {}. ", + "transfer_ori_acc": 0.236844571841243, + "transfer_atk_acc": 0.19421293922384184 + }, + { + "dataset": "un_multi", + "type": "task", + "origin_acc": 0.31, + "origin_prompt": "Convert the following sentence from its original {} language to the target language {}.", + "atk_acc": 0.22, + "atk_prompt": "Convert the below sentence from its indigenous {} language to the blanco language {}. ", + "transfer_ori_acc": 0.3527971649826467, + "transfer_atk_acc": 0.3519334378680938 + }, + { + "dataset": "un_multi", + "type": "role", + "origin_acc": 0.31, + "origin_prompt": "Taking on the role of a certified translator, perform a translation of the given sentence from {} to {}.", + "atk_acc": 0.26, + "atk_prompt": "Taking on the role of a attest translator, implement a interpreters of the gave sanction from {} to {}. ", + "transfer_ori_acc": 0.35510202593913104, + "transfer_atk_acc": 0.3537216698989885 + }, + { + "dataset": "math", + "type": "task", + "origin_acc": 17.5, + "origin_prompt": "Find the solution for this mathematical challenge with {}.", + "atk_acc": 15.0, + "atk_prompt": "Find the resolutions for this mathematical challenge with {}. ", + "transfer_ori_acc": 15.625, + "transfer_atk_acc": 15.0 + }, + { + "dataset": "math", + "type": "role", + "origin_acc": 18.12, + "origin_prompt": "Embodying a mathematics advisor, find the answer to the following math problem with {}.", + "atk_acc": 13.75, + "atk_prompt": "Embodying a mathematics advice, find the rebuttal to the following math problem with {}. ", + "transfer_ori_acc": 15.0, + "transfer_atk_acc": 11.25 + } +] \ No newline at end of file diff --git a/results_transfer/google-flan-t5-large_google-flan-ul2/translation_0_shot.json b/results_transfer/google-flan-t5-large_google-flan-ul2/translation_0_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..da94f1847d8b81331f5f1bda59468538968a32df --- /dev/null +++ b/results_transfer/google-flan-t5-large_google-flan-ul2/translation_0_shot.json @@ -0,0 +1,122 @@ +[ + { + "dataset": "cola", + "type": "translation", + "origin_acc": 78.0, + "origin_prompt": "Assess the following sentence and determine if it is grammatically correct. Respond with 'Acceptable' or 'Unacceptable':", + "atk_acc": 74.6, + "atk_prompt": "As a tool for classifying valid sentences, determine whether 'acceptable' or 'unacceptable' is based on its grammatical rules.", + "transfer_ori_acc": 86.2, + "transfer_atk_acc": 85.8 + }, + { + "dataset": "mrpc", + "type": "translation", + "origin_acc": 82.6, + "origin_prompt": "As a semantic comparison expert, evaluate the given pair of sentences and determine if they are 'equivalent' or 'not_equivalent'.", + "atk_acc": 77.7, + "atk_prompt": "Determine whether a given pair of sentences is 'equivalent' or 'not_equivalent', and whether they are semantically identical.", + "transfer_ori_acc": 84.31372549019608, + "transfer_atk_acc": 85.29411764705883 + }, + { + "dataset": "qnli", + "type": "translation", + "origin_acc": 89.8, + "origin_prompt": "Evaluate whether the given context supports the answer to the question by responding with 'entailment' or 'not_entailment'.", + "atk_acc": 53.1, + "atk_prompt": "Answer with 'entailment' or 'not_entailment', inferring from the given context.", + "transfer_ori_acc": 93.89999999999999, + "transfer_atk_acc": 93.4 + }, + { + "dataset": "qqp", + "type": "translation", + "origin_acc": 88.1, + "origin_prompt": "Are the following two questions equivalent or not? Answer me with \"equivalent\" or \"not_equivalent\".", + "atk_acc": 70.9, + "atk_prompt": "As a tool for determining the equivalence of problems, the problems are examined and their similarity is classified as 'equivalent' or 'not_equivalent'.", + "transfer_ori_acc": 87.8, + "transfer_atk_acc": 87.3 + }, + { + "dataset": "rte", + "type": "translation", + "origin_acc": 86.64, + "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.", + "atk_acc": 80.51, + "atk_prompt": "Determinate the semantic connections of a given sentence and classify it as 'entailment' or 'not_entailment'.", + "transfer_ori_acc": 94.22382671480143, + "transfer_atk_acc": 90.97472924187726 + }, + { + "dataset": "sst2", + "type": "translation", + "origin_acc": 95.41, + "origin_prompt": "In the role of a sentiment analysis tool, respond with 'positive' or 'negative' to classify this statement:", + "atk_acc": 92.78, + "atk_prompt": "Classify what you're trying to convey in this sentence as 'positive' if it's positive, and 'negative' if it's negative.", + "transfer_ori_acc": 96.44495412844036, + "transfer_atk_acc": 96.55963302752293 + }, + { + "dataset": "wnli", + "type": "translation", + "origin_acc": 66.2, + "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.", + "atk_acc": 49.3, + "atk_prompt": "Examine the paraphrases of a given sentence and classify them 'entailment' or 'not_entailment'.", + "transfer_ori_acc": 78.87323943661971, + "transfer_atk_acc": 78.87323943661971 + }, + { + "dataset": "mmlu", + "type": "translation", + "origin_acc": 45.74, + "origin_prompt": "For each multiple-choice question about {}, identify the correct answer by selecting 'A', 'B', 'C', or 'D'.", + "atk_acc": 41.84, + "atk_prompt": "Choose the correct answer for '{}' from 'A', 'B', 'C', or 'D' in the multiple choice problem involving,", + "transfer_ori_acc": 51.77304964539007, + "transfer_atk_acc": 53.90070921985816 + }, + { + "dataset": "squad_v2", + "type": "translation", + "origin_acc": 88.18, + "origin_prompt": "As a proficient expert in the given context, search for the most relevant answer. If the answer cannot be found, respond by saying 'unanswerable'.", + "atk_acc": 84.51, + "atk_prompt": "Please give the cause of the incident mentioned in the context. If the cause is not clear, respond with 'unanswerable'.", + "transfer_ori_acc": 89.88229949874687, + "transfer_atk_acc": 88.87093586238325 + }, + { + "dataset": "iwslt", + "type": "translation", + "origin_acc": 0.14, + "origin_prompt": "Please perform a translation of the given sentence, converting it from {} to {}:", + "atk_acc": 0.12, + "atk_prompt": "Please translate the sentences below into {}, then {}.", + "transfer_ori_acc": 0.17287994792239936, + "transfer_atk_acc": 0.1731981736461422 + }, + { + "dataset": "un_multi", + "type": "translation", + "origin_acc": 0.27, + "origin_prompt": "Convert the following sentence from its original {} language to the target language {}:", + "atk_acc": 0.26, + "atk_prompt": "Please translate the next sentence from {} to {}.", + "transfer_ori_acc": 0.31092395135800066, + "transfer_atk_acc": 0.3173709801911563 + }, + { + "dataset": "math", + "type": "translation", + "origin_acc": 16.25, + "origin_prompt": "Evaluate the following mathematical problem that includes {}:", + "atk_acc": 10.62, + "atk_prompt": "Compute the result of {}.", + "transfer_ori_acc": 13.750000000000002, + "transfer_atk_acc": 12.5 + } +] \ No newline at end of file diff --git a/results_transfer/google-flan-t5-large_google-flan-ul2/translation_3_shot.json b/results_transfer/google-flan-t5-large_google-flan-ul2/translation_3_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..0877b4e5f652016350518ed29b07fa530a8cb0cc --- /dev/null +++ b/results_transfer/google-flan-t5-large_google-flan-ul2/translation_3_shot.json @@ -0,0 +1,122 @@ +[ + { + "dataset": "cola", + "type": "translation", + "origin_acc": 78.1, + "origin_prompt": "Assess the following sentence and determine if it is grammatically correct. Respond with 'Acceptable' or 'Unacceptable'.", + "atk_acc": 74.0, + "atk_prompt": "When evaluating the grammar of the sentence, classify it as 'acceptable' or 'unacceptable' according to the rules of the French language.", + "transfer_ori_acc": 86.1, + "transfer_atk_acc": 86.4 + }, + { + "dataset": "mrpc", + "type": "translation", + "origin_acc": 83.09, + "origin_prompt": "Evaluate whether the two provided sentences convey the same meaning by answering 'equivalent' or 'not_equivalent'.", + "atk_acc": 79.66, + "atk_prompt": "Using the semantic comparison method, determine whether the following two statements are 'equivalent' or 'not_equivalent'.", + "transfer_ori_acc": 85.7843137254902, + "transfer_atk_acc": 85.29411764705883 + }, + { + "dataset": "qnli", + "type": "translation", + "origin_acc": 89.3, + "origin_prompt": "Based on the provided context and question, decide if the information supports the answer by responding with 'entailment' or 'not_entailment'.", + "atk_acc": 54.8, + "atk_prompt": "Answer with 'entailment' or 'not_entailment', inferring from the given context.", + "transfer_ori_acc": 94.5, + "transfer_atk_acc": 93.0 + }, + { + "dataset": "qqp", + "type": "translation", + "origin_acc": 88.1, + "origin_prompt": "Are the following two questions equivalent or not? Answer me with \"equivalent\" or \"not_equivalent\".", + "atk_acc": 77.9, + "atk_prompt": "As a tool for determining equivalence, check sentences and categorize them into 'equivalent' or 'not_equivalent' categories.", + "transfer_ori_acc": 88.5, + "transfer_atk_acc": 89.0 + }, + { + "dataset": "rte", + "type": "translation", + "origin_acc": 85.2, + "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.", + "atk_acc": 79.78, + "atk_prompt": "Rate the similarity of a given sentence and categorize it as 'entailment' or 'not_entailment'.", + "transfer_ori_acc": 93.14079422382672, + "transfer_atk_acc": 91.69675090252709 + }, + { + "dataset": "sst2", + "type": "translation", + "origin_acc": 95.53, + "origin_prompt": "In the role of a sentiment analysis tool, respond with 'positive' or 'negative' to classify this statement.", + "atk_acc": 93.23, + "atk_prompt": "As an emotion analysis tool, determine whether the emotion in the text is 'positive' or 'negative'.", + "transfer_ori_acc": 96.67431192660551, + "transfer_atk_acc": 94.4954128440367 + }, + { + "dataset": "wnli", + "type": "translation", + "origin_acc": 66.2, + "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.", + "atk_acc": 49.3, + "atk_prompt": "Examine the paraphrases of a given sentence and classify them 'entailment' or 'not_entailment'.", + "transfer_ori_acc": 76.05633802816901, + "transfer_atk_acc": 80.28169014084507 + }, + { + "dataset": "mmlu", + "type": "translation", + "origin_acc": 46.99, + "origin_prompt": "As an expert in {}, respond to the following multiple-choice question by selecting 'A', 'B', 'C', or 'D'.", + "atk_acc": 43.97, + "atk_prompt": "Choose the correct answer for '{}' from 'A', 'B', 'C', or 'D' in the multiple choice problem involving,", + "transfer_ori_acc": 53.72340425531915, + "transfer_atk_acc": 53.36879432624113 + }, + { + "dataset": "squad_v2", + "type": "translation", + "origin_acc": 87.91, + "origin_prompt": "From the context, provide the most precise answer. If the answer is not in the context, respond with 'unanswerable'.", + "atk_acc": 86.22, + "atk_prompt": "Briefly describes the situation and provides the corresponding response. If the answer cannot be found, answer with 'unanswerable'.", + "transfer_ori_acc": 89.38229949874687, + "transfer_atk_acc": 90.88229949874687 + }, + { + "dataset": "iwslt", + "type": "translation", + "origin_acc": 0.22, + "origin_prompt": "Convert the following sentence from its original {} language to the target language {}.", + "atk_acc": 0.2, + "atk_prompt": "Please translate the sentences below into {}, then {}.", + "transfer_ori_acc": 0.23981220411098694, + "transfer_atk_acc": 0.23367230863817542 + }, + { + "dataset": "un_multi", + "type": "translation", + "origin_acc": 0.31, + "origin_prompt": "Translate the provided sentence from {} to {} while maintaining the original meaning and context.", + "atk_acc": 0.3, + "atk_prompt": "Please translate the given sentence into {} to {}.", + "transfer_ori_acc": 0.35929731147922295, + "transfer_atk_acc": 0.35719628133790055 + }, + { + "dataset": "math", + "type": "translation", + "origin_acc": 18.12, + "origin_prompt": "Embodying a mathematics advisor, find the answer to the following math problem with {}.", + "atk_acc": 15.62, + "atk_prompt": "According to {}, set the following value.", + "transfer_ori_acc": 15.0, + "transfer_atk_acc": 16.25 + } +] \ No newline at end of file diff --git a/results_transfer/google-flan-t5-large_vicuna-13b/bertattack_0_shot.json b/results_transfer/google-flan-t5-large_vicuna-13b/bertattack_0_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..df9d71a9b96fc5d531006dd7e216c4d68c806390 --- /dev/null +++ b/results_transfer/google-flan-t5-large_vicuna-13b/bertattack_0_shot.json @@ -0,0 +1,162 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 77.9, + "origin_prompt": "Examine the given sentence and decide if it is grammatically sound. Answer with either 'Acceptable' or 'Unacceptable':", + "atk_acc": 30.9, + "atk_prompt": "examine the english plural and say if it is grammaticalmost sound. Answer with neither 'Acceptable' or 'Unacceptable':", + "transfer_ori_acc": 50.0, + "transfer_atk_acc": 41.099999999999994 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 76.3, + "origin_prompt": "As an instrument for grammar evaluation, consider the sentence and determine if its grammar is correct, responding with 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar:", + "atk_acc": 30.9, + "atk_prompt": "for an organ for linguistic evaluation, write the movie and question if its grammar is incorrect, and with 'acceptable' for new grammar or 'unacceptable' for unstable grammar:", + "transfer_ori_acc": 36.199999999999996, + "transfer_atk_acc": 44.4 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 82.11, + "origin_prompt": "Do these two sentences have the same underlying meaning? Respond with 'equivalent' or 'not_equivalent'.", + "atk_acc": 16.67, + "atk_prompt": "do these two clauses have the same underlying meanings? deal with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 54.90196078431373, + "transfer_atk_acc": 18.38235294117647 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 82.6, + "origin_prompt": "In your capacity as a semantic comparison tool, examine the given sentences and decide if their meanings are identical, responding with 'equivalent' or 'not_equivalent'.", + "atk_acc": 9.07, + "atk_prompt": "using your capacity as a semantic relations member, determine the given words and decide if their meanings are consecutive, comparison with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 64.2156862745098, + "transfer_atk_acc": 51.71568627450981 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 88.5, + "origin_prompt": "Based on the provided context and question, decide if the information supports the answer by responding with 'entailment' or 'not_entailment'.", + "atk_acc": 27.6, + "atk_prompt": "following on the supplied context and questioning, save if the information states the response by simply with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 38.4, + "transfer_atk_acc": 9.6 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 89.2, + "origin_prompt": "As a textual analyst, examine if the given context logically implies the answer to the question and indicate your decision with 'entailment' or 'not_entailment'.", + "atk_acc": 11.8, + "atk_prompt": "as a textual analyst, scrutiny if the following means logical) implies the reply to the answer and cross your r with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 13.8, + "transfer_atk_acc": 6.9 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 88.1, + "origin_prompt": "Are the following two questions equivalent or not? Answer me with \"equivalent\" or \"not_equivalent\".", + "atk_acc": 80.2, + "atk_prompt": "to the next enough being equivalent or not? Answer me with \"equivalent\" or \"not_equivalent\". ", + "transfer_ori_acc": 23.9, + "transfer_atk_acc": 19.3 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 87.7, + "origin_prompt": "Acting as a question equivalence instrument, determine if the provided questions are equivalent in meaning, answering with 'equivalent' for similar questions or 'not_equivalent' for dissimilar ones.", + "atk_acc": 72.2, + "atk_prompt": "t as a question equal instrument, determine if the program questions are equivalent in semantic, asking with 'equivalent' for analogous question or 'not_equivalent' for nonssimicomplex times. ", + "transfer_ori_acc": 25.0, + "transfer_atk_acc": 17.9 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 86.64, + "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.", + "atk_acc": 81.59, + "atk_prompt": "be the following deux verbs entailment or not_entailment? asking me with \"entailment\" or \"not_entailment\", just one word. ", + "transfer_ori_acc": 27.075812274368232, + "transfer_atk_acc": 34.29602888086642 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 84.84, + "origin_prompt": "While performing entailment analysis, classify the relationship between the provided sentences as 'entailment' or 'not_entailment':", + "atk_acc": 78.7, + "atk_prompt": "While performing entailment analysis, construct the relationship between the and sentence as 'entailment' or 'not_entailment':", + "transfer_ori_acc": 23.104693140794225, + "transfer_atk_acc": 15.162454873646208 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 94.95, + "origin_prompt": "Given the context of this text, indicate if the emotion conveyed is 'positive' or 'negative':", + "atk_acc": 92.89, + "atk_prompt": "give the context of this context, and if the emotion recorded is 'positive' or 'negative': ", + "transfer_ori_acc": 29.472477064220183, + "transfer_atk_acc": 22.24770642201835 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 95.41, + "origin_prompt": "In the role of a sentiment analysis tool, respond with 'positive' or 'negative' to classify this statement:", + "atk_acc": 89.79, + "atk_prompt": "how the role of a compliment analysis tool, responses with 'positive' or 'negative' to mood this statement: ", + "transfer_ori_acc": 32.798165137614674, + "transfer_atk_acc": 39.10550458715596 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 66.2, + "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.", + "atk_acc": 54.93, + "atk_prompt": "are the following analogous sentence entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one verb. ", + "transfer_ori_acc": 25.352112676056336, + "transfer_atk_acc": 35.2112676056338 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 64.79, + "origin_prompt": "In the capacity of an entailment assessment system, indicate if the link between the following sentences is 'entailment' or 'not_entailment':", + "atk_acc": 56.34, + "atk_prompt": "h the capacity of an entailment assessment system, indicate if the link between the following verb is 'entailment' or 'not_entailment':", + "transfer_ori_acc": 43.66197183098591, + "transfer_atk_acc": 12.676056338028168 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 45.57, + "origin_prompt": "For the multiple-choice question related to {}, please choose the most accurate answer from 'A', 'B', 'C', or 'D'.", + "atk_acc": 16.84, + "atk_prompt": "For the multiple-choice choice similar to {}, please find the most inaccurate questions from 'A', 'B', 'C', or 'D'. ", + "transfer_ori_acc": 18.79432624113475, + "transfer_atk_acc": 22.5177304964539 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 44.86, + "origin_prompt": "Utilizing your expertise in {}, answer the following multiple-choice question by picking 'A', 'B', 'C', or 'D'.", + "atk_acc": 3.19, + "atk_prompt": "Utilizing your expertise in {}, write the following multiple-choices question by each 'A', 'B', 'C', or 'D'.", + "transfer_ori_acc": 20.74468085106383, + "transfer_atk_acc": 30.141843971631204 + } +] \ No newline at end of file diff --git a/results_transfer/google-flan-t5-large_vicuna-13b/bertattack_3_shot.json b/results_transfer/google-flan-t5-large_vicuna-13b/bertattack_3_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..0ba336a803ade82723226a46060479531fed579e --- /dev/null +++ b/results_transfer/google-flan-t5-large_vicuna-13b/bertattack_3_shot.json @@ -0,0 +1,162 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 78.1, + "origin_prompt": "Assess the following sentence and determine if it is grammatically correct. Respond with 'Acceptable' or 'Unacceptable'.", + "atk_acc": 69.9, + "atk_prompt": "Assess the following conversation and pick if it is grammatical: correct. interact with 'Acceptable' or 'Unacceptable'. ", + "transfer_ori_acc": 0.8999999999999999, + "transfer_atk_acc": 28.799999999999997 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 75.8, + "origin_prompt": "As an instrument for grammar evaluation, consider the sentence and determine if its grammar is correct, responding with 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar.", + "atk_acc": 69.2, + "atk_prompt": "As an item for grammar acquisition, determine the conversation and determine if its grammar is okay, responding with 'acceptable' for normal grammar or 'unacceptable' for using spelling. ", + "transfer_ori_acc": 15.299999999999999, + "transfer_atk_acc": 19.5 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 82.6, + "origin_prompt": "Can the given sentences be considered semantically identical? Please reply with 'equivalent' or 'not_equivalent'.", + "atk_acc": 73.28, + "atk_prompt": "can the given words be considered semantically adjacent? Please respond with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 65.44117647058823, + "transfer_atk_acc": 70.34313725490196 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 81.37, + "origin_prompt": "As a semantic comparison expert, evaluate the given pair of sentences and determine if they are 'equivalent' or 'not_equivalent'.", + "atk_acc": 68.87, + "atk_prompt": "As a grammatical compares professionals, find the given language of sentence and determine if they are 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 67.8921568627451, + "transfer_atk_acc": 68.13725490196079 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 89.3, + "origin_prompt": "Based on the provided context and question, decide if the information supports the answer by responding with 'entailment' or 'not_entailment'.", + "atk_acc": 45.5, + "atk_prompt": "from on the supplied context and questions, save if the information supports the answer by simply with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 32.4, + "transfer_atk_acc": 24.7 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 88.5, + "origin_prompt": "In your role as a semantic evaluator, determine if the provided context justifies the answer to the question and answer with 'entailment' or 'not_entailment'.", + "atk_acc": 40.6, + "atk_prompt": "when your role as a semantic evaluator, be if the reference subject justmatches the answer to the answer and also with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 24.6, + "transfer_atk_acc": 10.5 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 88.1, + "origin_prompt": "Are the following two questions equivalent or not? Answer me with \"equivalent\" or \"not_equivalent\".", + "atk_acc": 82.5, + "atk_prompt": "Are the so a equations equivalent or not? Answer me with \"equivalent\" or \"not_equivalent\". ", + "transfer_ori_acc": 34.8, + "transfer_atk_acc": 36.6 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 86.5, + "origin_prompt": "As an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions.", + "atk_acc": 63.9, + "atk_prompt": "to an instrument for question ranking evaluation, consider the words and pick if their mood is the same, returning with 'equivalent' for analogous question or 'not_equivalent' for known questions. ", + "transfer_ori_acc": 35.3, + "transfer_atk_acc": 29.9 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 85.2, + "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.", + "atk_acc": 80.87, + "atk_prompt": "can the is compare sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word. ", + "transfer_ori_acc": 49.81949458483754, + "transfer_atk_acc": 51.26353790613718 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 84.12, + "origin_prompt": "Acting as an entailment detection instrument, determine if the given pair of sentences demonstrates entailment or not_entailment. Answer with 'entailment' or 'not_entailment'.", + "atk_acc": 78.7, + "atk_prompt": "Acting as an entailment detection instrument, be if the given language of sentences does entailment or not_entailment. start with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 4.332129963898916, + "transfer_atk_acc": 40.43321299638989 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 95.3, + "origin_prompt": "Considering the given phrase, would you say it carries a 'positive' or 'negative' connotation?", + "atk_acc": 91.51, + "atk_prompt": "given the verbal conjunction, who you say it implies a 'positive' or 'negative' connotation? ", + "transfer_ori_acc": 1.7201834862385321, + "transfer_atk_acc": 14.3348623853211 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 95.41, + "origin_prompt": "Acting as a sentiment evaluator, identify if the given sentence is 'positive' or 'negative'.", + "atk_acc": 92.43, + "atk_prompt": "Acting as a positive evaluator, choose if the positive sentence is 'positive' or 'negative'. ", + "transfer_ori_acc": 10.77981651376147, + "transfer_atk_acc": 1.94954128440367 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 64.79, + "origin_prompt": "Assess the connection between the following sentences and classify it as 'entailment' or 'not_entailment'.", + "atk_acc": 54.93, + "atk_prompt": "Assess the connecting between the relevant sentences and classify it as 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 19.718309859154928, + "transfer_atk_acc": 14.084507042253522 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 66.2, + "origin_prompt": "Functioning as an entailment evaluation tool, analyze the provided sentences and decide if their relationship is 'entailment' or 'not_entailment'.", + "atk_acc": 49.3, + "atk_prompt": "Functioning as an entailment evaluation tool, analyze the generated sentences and place if their conversation is 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 9.859154929577464, + "transfer_atk_acc": 29.577464788732392 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 46.45, + "origin_prompt": "For each multiple-choice question about {}, identify the correct answer by selecting 'A', 'B', 'C', or 'D'.", + "atk_acc": 16.84, + "atk_prompt": "For each multiple-. question about {}, reveal the incorrect ask by selecting 'A', 'B', 'C', or 'D'. ", + "transfer_ori_acc": 6.560283687943262, + "transfer_atk_acc": 6.382978723404255 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 46.99, + "origin_prompt": "Considering your familiarity with {}, attend to the following multiple-choice question by picking 'A', 'B', 'C', or 'D'.", + "atk_acc": 43.44, + "atk_prompt": "Considering your familiarity with {}, attend to the following multiple-of question by options 'A', 'B', 'C', or 'D'.", + "transfer_ori_acc": 8.156028368794328, + "transfer_atk_acc": 7.2695035460992905 + } +] \ No newline at end of file diff --git a/results_transfer/google-flan-t5-large_vicuna-13b/checklist_0_shot.json b/results_transfer/google-flan-t5-large_vicuna-13b/checklist_0_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..29af9b0136cb67e1efbab63d57302f35b85f4329 --- /dev/null +++ b/results_transfer/google-flan-t5-large_vicuna-13b/checklist_0_shot.json @@ -0,0 +1,162 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 77.9, + "origin_prompt": "Examine the given sentence and decide if it is grammatically sound. Answer with either 'Acceptable' or 'Unacceptable':", + "atk_acc": 62.6, + "atk_prompt": "Examine the given sentence and decide if it is grammatically sound. Answer with either 'Acceptable' or 'Unacceptable' IhuBIhoPGc:", + "transfer_ori_acc": 50.0, + "transfer_atk_acc": 50.6 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 76.3, + "origin_prompt": "As an instrument for grammar evaluation, consider the sentence and determine if its grammar is correct, responding with 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar:", + "atk_acc": 74.9, + "atk_prompt": "As an instrument for grammar evaluation, consider the sentence and determine if its grammar is correct, responding with 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar IhuBIhoPGc:", + "transfer_ori_acc": 36.199999999999996, + "transfer_atk_acc": 30.599999999999998 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 82.11, + "origin_prompt": "Do these two sentences have the same underlying meaning? Respond with 'equivalent' or 'not_equivalent'.", + "atk_acc": 79.17, + "atk_prompt": "Do these two sentences have the same underlying meaning? Respond with 'equivalent' or 'not_equivalent' MSAddJ4D2a. ", + "transfer_ori_acc": 54.90196078431373, + "transfer_atk_acc": 39.705882352941174 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 82.6, + "origin_prompt": "As a semantic comparison expert, evaluate the given pair of sentences and determine if they are 'equivalent' or 'not_equivalent'.", + "atk_acc": 80.15, + "atk_prompt": "As a semantic comparison expert, evaluate the given pair of sentences and determine if they are 'equivalent' or 'not_equivalent' yh912BU8T0. ", + "transfer_ori_acc": 32.35294117647059, + "transfer_atk_acc": 35.78431372549019 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 88.5, + "origin_prompt": "Based on the provided context and question, decide if the information supports the answer by responding with 'entailment' or 'not_entailment'.", + "atk_acc": 87.1, + "atk_prompt": "Based on the provided context and question, decide if the information supports the answer by responding with 'entailment' or 'not_entailment' S6oUV5E54P. ", + "transfer_ori_acc": 38.4, + "transfer_atk_acc": 15.6 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 89.4, + "origin_prompt": "As a semantic interpreter, assess whether the provided context supports the answer to the given question and answer with 'entailment' or 'not_entailment'.", + "atk_acc": 88.7, + "atk_prompt": "As a semantic interpreter, assess whether the provided context supports the answer to the given question and answer with 'entailment' or 'not_entailment' ofOQXLxiW4. ", + "transfer_ori_acc": 8.7, + "transfer_atk_acc": 25.0 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 87.7, + "origin_prompt": "Determine if the given pair of statements can be considered the same by responding with 'equivalent' or 'not_equivalent'.", + "atk_acc": 86.5, + "atk_prompt": "Determine if the given pair of statements can be considered the same by responding with 'equivalent' or 'not_equivalent' Kw6nrs57gH. ", + "transfer_ori_acc": 13.5, + "transfer_atk_acc": 15.5 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 86.9, + "origin_prompt": "Functioning as a question similarity evaluation tool, analyze the given questions and decide if they share the same meaning, responding with 'equivalent' or 'not_equivalent'.", + "atk_acc": 86.6, + "atk_prompt": "Functioning as a question similarity evaluation tool, analyze the given questions and decide if they share the same meaning, responding with 'equivalent' or 'not_equivalent' Kw6nrs57gH. ", + "transfer_ori_acc": 14.6, + "transfer_atk_acc": 20.3 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 86.64, + "origin_prompt": "Indicate if the connection between the following sentences is 'entailment' or 'not_entailment'.", + "atk_acc": 83.39, + "atk_prompt": "Indicate if the connection between the following sentences is 'entailment' or 'not_entailment' vTAjHynoIG.", + "transfer_ori_acc": 38.26714801444043, + "transfer_atk_acc": 37.545126353790614 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 84.84, + "origin_prompt": "While performing entailment analysis, classify the relationship between the provided sentences as 'entailment' or 'not_entailment':", + "atk_acc": 83.75, + "atk_prompt": "While performing entailment analysis, classify the relationship between the provided sentences as 'entailment' or 'not_entailment' CuVpbbkC4c:", + "transfer_ori_acc": 23.104693140794225, + "transfer_atk_acc": 10.469314079422382 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 94.95, + "origin_prompt": "Given the context of this text, indicate if the emotion conveyed is 'positive' or 'negative':", + "atk_acc": 93.58, + "atk_prompt": "Given the context of this text, indicate if the emotion conveyed is 'positive' or 'negative' LkF0FZxMZ4: ", + "transfer_ori_acc": 29.472477064220183, + "transfer_atk_acc": 37.27064220183486 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 95.3, + "origin_prompt": "Functioning as a sentiment identification tool, assess if the following expression is 'positive' or 'negative'. Classify:", + "atk_acc": 94.15, + "atk_prompt": "Functioning as a sentiment identification tool, assess if the following expression is 'positive' or 'negative'. Classify ItGDrrA1Sc: ", + "transfer_ori_acc": 38.646788990825684, + "transfer_atk_acc": 41.857798165137616 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 64.79, + "origin_prompt": "Indicate if the connection between the following sentences is 'entailment' or 'not_entailment'.", + "atk_acc": 61.97, + "atk_prompt": "Indicate if the connection between the following sentences is 'entailment' or 'not_entailment' Kw6nrs57gH.", + "transfer_ori_acc": 33.80281690140845, + "transfer_atk_acc": 38.028169014084504 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 66.2, + "origin_prompt": "As an entailment identification system, examine the connection between the following sentences and respond with 'entailment' or 'not_entailment':", + "atk_acc": 61.97, + "atk_prompt": "As an entailment identification system, examine the connection between the following sentences and respond with 'entailment' or 'not_entailment' SFPCqpiClT:", + "transfer_ori_acc": 43.66197183098591, + "transfer_atk_acc": 39.436619718309856 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 45.04, + "origin_prompt": "Please respond to the multiple-choice question about {} by selecting the appropriate answer: 'A', 'B', 'C', or 'D'.", + "atk_acc": 43.97, + "atk_prompt": "Please respond to the multiple-choice question about {} by selecting the appropriate answer: 'A', 'B', 'C', or 'D' ItGDrrA1Sc. ", + "transfer_ori_acc": 19.148936170212767, + "transfer_atk_acc": 19.680851063829788 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 44.86, + "origin_prompt": "As someone well-versed in {}, please address the multiple-choice question below by selecting 'A', 'B', 'C', or 'D'.", + "atk_acc": 43.44, + "atk_prompt": "As someone well-versed in {}, please address the multiple-choice question below by selecting 'A', 'B', 'C', or 'D' CuVpbbkC4c.", + "transfer_ori_acc": 13.829787234042554, + "transfer_atk_acc": 13.47517730496454 + } +] \ No newline at end of file diff --git a/results_transfer/google-flan-t5-large_vicuna-13b/checklist_3_shot.json b/results_transfer/google-flan-t5-large_vicuna-13b/checklist_3_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..4aee4f053623ff758f5b325b64660aa7ec4ff478 --- /dev/null +++ b/results_transfer/google-flan-t5-large_vicuna-13b/checklist_3_shot.json @@ -0,0 +1,162 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 78.1, + "origin_prompt": "Assess the following sentence and determine if it is grammatically correct. Respond with 'Acceptable' or 'Unacceptable'.", + "atk_acc": 77.0, + "atk_prompt": "Assess the following sentence and determine if it is grammatically correct. Respond with 'Acceptable' or 'Unacceptable' zexKUTWJYG. ", + "transfer_ori_acc": 0.8999999999999999, + "transfer_atk_acc": 2.5 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 75.7, + "origin_prompt": "While performing grammar analysis, classify the grammar of the following sentence as 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar.", + "atk_acc": 74.6, + "atk_prompt": "While performing grammar analysis, classify the grammar of the following sentence as 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar zexKUTWJYG. ", + "transfer_ori_acc": 49.9, + "transfer_atk_acc": 63.3 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 82.84, + "origin_prompt": "Please analyze the provided sentences and indicate if their meanings are the same by choosing 'equivalent' or 'not_equivalent'.", + "atk_acc": 80.88, + "atk_prompt": "Please analyze the provided sentences and indicate if their meanings are the same by choosing 'equivalent' or 'not_equivalent' CuVpbbkC4c. ", + "transfer_ori_acc": 61.274509803921575, + "transfer_atk_acc": 69.36274509803921 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 80.15, + "origin_prompt": "In the role of a sentence comparison analyst, assess the provided sentences and indicate if they convey the same meaning by selecting 'equivalent' or 'not_equivalent'.", + "atk_acc": 79.66, + "atk_prompt": "In the role of a sentence comparison analyst, assess the provided sentences and indicate if they convey the same meaning by selecting 'equivalent' or 'not_equivalent' BWmsLx8xOA. ", + "transfer_ori_acc": 68.87254901960785, + "transfer_atk_acc": 67.64705882352942 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 87.8, + "origin_prompt": "Based on the information in the context, decide if the answer to the question is justified by choosing 'entailment' or 'not_entailment'.", + "atk_acc": 86.4, + "atk_prompt": "Based on the information in the context, decide if the answer to the question is justified by choosing 'entailment' or 'not_entailment' mOJ9nEwT4f. ", + "transfer_ori_acc": 42.5, + "transfer_atk_acc": 12.6 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 88.5, + "origin_prompt": "In your role as a semantic evaluator, determine if the provided context justifies the answer to the question and answer with 'entailment' or 'not_entailment'.", + "atk_acc": 87.7, + "atk_prompt": "In your role as a semantic evaluator, determine if the provided context justifies the answer to the question and answer with 'entailment' or 'not_entailment' MSAddJ4D2a. ", + "transfer_ori_acc": 24.6, + "transfer_atk_acc": 2.5 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 87.6, + "origin_prompt": "Are the meanings of these two phrases the same? Reply with 'equivalent' or 'not_equivalent'.", + "atk_acc": 87.4, + "atk_prompt": "Are the meanings of these two phrases the same? Reply with 'equivalent' or 'not_equivalent' Pbl3tYuoRo. ", + "transfer_ori_acc": 33.2, + "transfer_atk_acc": 31.2 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 86.5, + "origin_prompt": "As an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions.", + "atk_acc": 86.0, + "atk_prompt": "As an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions 6a4Yn3RGVc. ", + "transfer_ori_acc": 35.3, + "transfer_atk_acc": 32.300000000000004 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 84.84, + "origin_prompt": "Identify whether the given pair of sentences demonstrates entailment or not_entailment. Answer with 'entailment' or 'not_entailment'.", + "atk_acc": 83.39, + "atk_prompt": "Identify whether the given pair of sentences demonstrates entailment or not_entailment. Answer with 'entailment' or 'not_entailment' 6a4Yn3RGVc. ", + "transfer_ori_acc": 7.2202166064981945, + "transfer_atk_acc": 19.855595667870034 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 84.12, + "origin_prompt": "In the capacity of an entailment assessment system, indicate if the link between the following sentences is 'entailment' or 'not_entailment'.", + "atk_acc": 82.31, + "atk_prompt": "In the capacity of an entailment assessment system, indicate if the link between the following sentences is 'entailment' or 'not_entailment' MQnugHcaoy. ", + "transfer_ori_acc": 21.299638989169676, + "transfer_atk_acc": 22.382671480144403 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 94.95, + "origin_prompt": "Analyze the tone of this statement and respond with either 'positive' or 'negative'.", + "atk_acc": 94.5, + "atk_prompt": "Analyze the tone of this statement and respond with either 'positive' or 'negative' XStsD2pUzu. ", + "transfer_ori_acc": 12.5, + "transfer_atk_acc": 8.027522935779817 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 95.41, + "origin_prompt": "Acting as a sentiment evaluator, identify if the given sentence is 'positive' or 'negative'.", + "atk_acc": 94.84, + "atk_prompt": "Acting as a sentiment evaluator, identify if the given sentence is 'positive' or 'negative' SFPCqpiClT. ", + "transfer_ori_acc": 10.77981651376147, + "transfer_atk_acc": 8.256880733944955 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 64.79, + "origin_prompt": "Assess the connection between the following sentences and classify it as 'entailment' or 'not_entailment'.", + "atk_acc": 63.38, + "atk_prompt": "Assess the connection between the following sentences and classify it as 'entailment' or 'not_entailment' Cev0s49fbe. ", + "transfer_ori_acc": 19.718309859154928, + "transfer_atk_acc": 11.267605633802818 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 66.2, + "origin_prompt": "As an entailment identification system, examine the connection between the following sentences and respond with 'entailment' or 'not_entailment'.", + "atk_acc": 66.2, + "atk_prompt": "As an entailment identification system, examine the connection between the following sentences and respond with 'entailment' or 'not_entailment' 5XeflW1ZJc. ", + "transfer_ori_acc": 21.12676056338028, + "transfer_atk_acc": 9.859154929577464 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 45.92, + "origin_prompt": "Answer the subsequent multiple-choice question about {} by picking the right option among 'A', 'B', 'C', or 'D'.", + "atk_acc": 45.04, + "atk_prompt": "Answer the subsequent multiple-choice question about {} by picking the right option among 'A', 'B', 'C', or 'D' KjPJJ2a7RB. ", + "transfer_ori_acc": 7.092198581560284, + "transfer_atk_acc": 9.75177304964539 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 46.99, + "origin_prompt": "Considering your familiarity with {}, attend to the following multiple-choice question by picking 'A', 'B', 'C', or 'D'.", + "atk_acc": 45.04, + "atk_prompt": "Considering your familiarity with {}, attend to the following multiple-choice question by picking 'A', 'B', 'C', or 'D' SFPCqpiClT.", + "transfer_ori_acc": 8.156028368794328, + "transfer_atk_acc": 6.205673758865248 + } +] \ No newline at end of file diff --git a/results_transfer/google-flan-t5-large_vicuna-13b/deepwordbug_0_shot.json b/results_transfer/google-flan-t5-large_vicuna-13b/deepwordbug_0_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..b93f3d8e132672383a97b81060aac5ed597557a1 --- /dev/null +++ b/results_transfer/google-flan-t5-large_vicuna-13b/deepwordbug_0_shot.json @@ -0,0 +1,162 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 77.9, + "origin_prompt": "Examine the given sentence and decide if it is grammatically sound. Answer with either 'Acceptable' or 'Unacceptable':", + "atk_acc": 31.1, + "atk_prompt": "Exapine the gievn sentene and dVecide if it is grammktically sound. Answer with either 'Acceptable' or 'Unacceptable':", + "transfer_ori_acc": 50.0, + "transfer_atk_acc": 54.1 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 76.3, + "origin_prompt": "As an instrument for grammar evaluation, consider the sentence and determine if its grammar is correct, responding with 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar:", + "atk_acc": 36.5, + "atk_prompt": "sA an instrument for grammar evaluatiJn, cnsider the setnence and deterbmine if its grmamar is crorect, reYponding with 'acceptable' for corect gramOar or 'unacceptable' for incorGect rgammar:", + "transfer_ori_acc": 36.199999999999996, + "transfer_atk_acc": 36.8 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 82.35, + "origin_prompt": "Compare the following sentences and determine if they share the same semantic meaning by responding with 'equivalent' or 'not_equivalent'.", + "atk_acc": 76.96, + "atk_prompt": "Compar the folhowing sentences and deWermine if they share the same semantic emaning by respondig with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 67.40196078431373, + "transfer_atk_acc": 57.107843137254896 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 82.6, + "origin_prompt": "In your capacity as a semantic comparison tool, examine the given sentences and decide if their meanings are identical, responding with 'equivalent' or 'not_equivalent'.", + "atk_acc": 62.25, + "atk_prompt": "In your capacity as a seantic coGparison tolo, exaine the given esentences and decide if their meanings are identiacl, resYonding with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 64.2156862745098, + "transfer_atk_acc": 14.950980392156863 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 89.8, + "origin_prompt": "Evaluate whether the given context supports the answer to the question by responding with 'entailment' or 'not_entailment'.", + "atk_acc": 38.6, + "atk_prompt": "Evlauate whyther the giGven cotext supporzs the answeQ to the quePtion by rseponding with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 26.5, + "transfer_atk_acc": 17.2 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 89.4, + "origin_prompt": "As a semantic interpreter, assess whether the provided context supports the answer to the given question and answer with 'entailment' or 'not_entailment'.", + "atk_acc": 2.1, + "atk_prompt": "sA a esmantic interpreter, aBssess whehter the proviedd contexYt spuports the answher to the gEven questin and answOr with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 8.7, + "transfer_atk_acc": 8.6 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 87.8, + "origin_prompt": "Can these two statements be considered equal in meaning? Answer with 'equivalent' or 'not_equivalent'.", + "atk_acc": 59.8, + "atk_prompt": "Can these tow statYements be cnosidered qeual in maening? AnsweI with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 22.900000000000002, + "transfer_atk_acc": 17.2 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 86.9, + "origin_prompt": "Functioning as a question similarity evaluation tool, analyze the given questions and decide if they share the same meaning, responding with 'equivalent' or 'not_equivalent'.", + "atk_acc": 81.4, + "atk_prompt": "Functioning as a question smiilarity eavluation toXl, analyze the givYen questions and decidae if they share the same meajning, responding with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 14.6, + "transfer_atk_acc": 10.4 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 86.64, + "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.", + "atk_acc": 81.95, + "atk_prompt": "Aer the following two Ysentences entailment or not_entailment? nswer me with \"entailment\" or \"not_entailment\", just one word. ", + "transfer_ori_acc": 27.075812274368232, + "transfer_atk_acc": 14.079422382671481 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 84.84, + "origin_prompt": "While performing entailment analysis, classify the relationship between the provided sentences as 'entailment' or 'not_entailment':", + "atk_acc": 82.67, + "atk_prompt": "While performing entailment analysis, cassify the reiationship between the proviLded sentences as 'entailment' or 'not_entailment':", + "transfer_ori_acc": 23.104693140794225, + "transfer_atk_acc": 18.4115523465704 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 94.84, + "origin_prompt": "Analyze the tone of this statement and respond with either 'positive' or 'negative':", + "atk_acc": 94.04, + "atk_prompt": "Analze the tone of this sattement and respond with either 'positive' or 'negative': ", + "transfer_ori_acc": 47.935779816513765, + "transfer_atk_acc": 47.706422018348626 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 95.3, + "origin_prompt": "Serving as a sentiment evaluation model, determine if the given statement is 'positive' or 'negative'. Classify:", + "atk_acc": 64.91, + "atk_prompt": "Servign as a sentimBnt envaluation model, Qetermine if the Iiven statemen is 'positive' or 'negative'. Classhfy: ", + "transfer_ori_acc": 26.261467889908257, + "transfer_atk_acc": 48.96788990825688 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 66.2, + "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.", + "atk_acc": 60.56, + "atk_prompt": "Are the follHwing Ntwo sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one wor. ", + "transfer_ori_acc": 25.352112676056336, + "transfer_atk_acc": 23.943661971830984 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 66.2, + "origin_prompt": "As an entailment identification system, examine the connection between the following sentences and respond with 'entailment' or 'not_entailment':", + "atk_acc": 57.75, + "atk_prompt": "As an entailment identfiication zystem, examine the connection between the flolowing sentences and respond with 'entailment' or 'not_entailment':", + "transfer_ori_acc": 43.66197183098591, + "transfer_atk_acc": 38.028169014084504 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 45.74, + "origin_prompt": "For each multiple-choice question about {}, identify the correct answer by selecting 'A', 'B', 'C', or 'D'.", + "atk_acc": 42.2, + "atk_prompt": "For each multiple-chotce question about {}, dentify the caorrect answedr by selccting 'A', 'B', 'C', or 'D'. ", + "transfer_ori_acc": 22.69503546099291, + "transfer_atk_acc": 19.326241134751772 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 44.86, + "origin_prompt": "Utilizing your expertise in {}, answer the following multiple-choice question by picking 'A', 'B', 'C', or 'D'.", + "atk_acc": 41.13, + "atk_prompt": "Utilizing your expertise in {}, ansKer the following multiprle-choice question by ipcking 'A', 'B', 'C', or 'D'.", + "transfer_ori_acc": 20.74468085106383, + "transfer_atk_acc": 23.93617021276596 + } +] \ No newline at end of file diff --git a/results_transfer/google-flan-t5-large_vicuna-13b/deepwordbug_3_shot.json b/results_transfer/google-flan-t5-large_vicuna-13b/deepwordbug_3_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..3a322e2cf03a6da6cbc764570de60609e5b20330 --- /dev/null +++ b/results_transfer/google-flan-t5-large_vicuna-13b/deepwordbug_3_shot.json @@ -0,0 +1,162 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 77.8, + "origin_prompt": "Is the provided sentence grammatically correct? Respond with 'Acceptable' or 'Unacceptable'.", + "atk_acc": 68.6, + "atk_prompt": "Is the provided semtence rammatically corrct? Respond with 'Acceptable' or 'Unacceptable'. ", + "transfer_ori_acc": 6.0, + "transfer_atk_acc": 1.0999999999999999 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 75.7, + "origin_prompt": "While performing grammar analysis, classify the grammar of the following sentence as 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar.", + "atk_acc": 70.5, + "atk_prompt": "While hperforming grammNar anlaysis, classfiy the grammar of the foBlowing sentence as 'acceptable' for corrept grammgar or 'unacceptable' for incborrect grMammar. ", + "transfer_ori_acc": 49.9, + "transfer_atk_acc": 40.0 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 83.09, + "origin_prompt": "Evaluate whether the two provided sentences convey the same meaning by answering 'equivalent' or 'not_equivalent'.", + "atk_acc": 78.92, + "atk_prompt": "Evaluate whether the two provided senetnces convey the same meaning by answering 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 65.19607843137256, + "transfer_atk_acc": 66.17647058823529 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 81.37, + "origin_prompt": "As a semantic comparison expert, evaluate the given pair of sentences and determine if they are 'equivalent' or 'not_equivalent'.", + "atk_acc": 76.72, + "atk_prompt": "As a semantic comparisKon experEt, evaluate the given pari of sentence and determine if they are 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 67.8921568627451, + "transfer_atk_acc": 69.6078431372549 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 89.3, + "origin_prompt": "Based on the provided context and question, decide if the information supports the answer by responding with 'entailment' or 'not_entailment'.", + "atk_acc": 39.5, + "atk_prompt": "Baesd on the prohided conteMt and questino, dRcide if the infzrmation suppoLrts the aOnswer by resXonding with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 32.4, + "transfer_atk_acc": 39.5 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 88.8, + "origin_prompt": "In the capacity of a language evaluator, examine if the given context justifies the answer to the question and indicate your assessment with 'entailment' or 'not_entailment'.", + "atk_acc": 44.5, + "atk_prompt": "bn the cUapacity of a langtage valuator, Cxamine if the givcen contjxt jYustifies the anbwer to the qPuestion and idicate your assetssment with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 34.4, + "transfer_atk_acc": 24.099999999999998 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 88.1, + "origin_prompt": "Are the following two questions equivalent or not? Answer me with \"equivalent\" or \"not_equivalent\".", + "atk_acc": 77.3, + "atk_prompt": "re the flolowing two questFons equivalent or not? Answe me with \"equivalent\" or \"not_equivalent\". ", + "transfer_ori_acc": 34.8, + "transfer_atk_acc": 27.6 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 86.5, + "origin_prompt": "As an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions.", + "atk_acc": 81.8, + "atk_prompt": "As an instrument for question cotmparison evaluVtion, consider the questionPs and determbine if their maening is the same, responRing with 'equivalent' for similar questins or 'not_equivalent' for difserent questions. ", + "transfer_ori_acc": 35.3, + "transfer_atk_acc": 31.1 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 85.2, + "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.", + "atk_acc": 80.87, + "atk_prompt": "Aer the followig mwo sentences entailment or not_entailment? nswer me with \"entailment\" or \"not_entailment\", just noe word. ", + "transfer_ori_acc": 49.81949458483754, + "transfer_atk_acc": 46.931407942238266 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 84.12, + "origin_prompt": "Acting as an entailment detection instrument, determine if the given pair of sentences demonstrates entailment or not_entailment. Answer with 'entailment' or 'not_entailment'.", + "atk_acc": 81.59, + "atk_prompt": "Acting as an entailment detection instrument, dGetermine if the given pair of sentences demonstpates entailment or not_entailment. AVswer with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 4.332129963898916, + "transfer_atk_acc": 31.046931407942242 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 95.3, + "origin_prompt": "Considering the given phrase, would you say it carries a 'positive' or 'negative' connotation?", + "atk_acc": 93.92, + "atk_prompt": "Considering the given phraes, wouHd you Fay it carries a 'positive' or 'negative' connotavtion? ", + "transfer_ori_acc": 1.7201834862385321, + "transfer_atk_acc": 5.275229357798166 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 95.41, + "origin_prompt": "Acting as a sentiment evaluator, identify if the given sentence is 'positive' or 'negative'.", + "atk_acc": 94.61, + "atk_prompt": "Acting as a seniment valuator, dientify if the giveIn sentence is 'positive' or 'negative'. ", + "transfer_ori_acc": 10.77981651376147, + "transfer_atk_acc": 25.0 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 64.79, + "origin_prompt": "Assess the connection between the following sentences and classify it as 'entailment' or 'not_entailment'.", + "atk_acc": 54.93, + "atk_prompt": "Assses the connection between the followoing sentence and classify it as 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 19.718309859154928, + "transfer_atk_acc": 15.492957746478872 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 66.2, + "origin_prompt": "In the capacity of an entailment assessment system, indicate if the link between the following sentences is 'entailment' or 'not_entailment'.", + "atk_acc": 56.34, + "atk_prompt": "In the capaciy of an entailment assessment system, inDdicate if the liQk between the followign sentences is 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 9.859154929577464, + "transfer_atk_acc": 4.225352112676056 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 46.81, + "origin_prompt": "In relation to the multiple-choice question on {}, please provide the accurate answer by choosing 'A', 'B', 'C', or 'D'.", + "atk_acc": 43.44, + "atk_prompt": "In relation to the ultiple-choice question on {}, lease rovide the accuraet answer by choosieg 'A', 'B', 'C', or 'D'. ", + "transfer_ori_acc": 9.397163120567376, + "transfer_atk_acc": 9.397163120567376 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 46.99, + "origin_prompt": "Considering your familiarity with {}, attend to the following multiple-choice question by picking 'A', 'B', 'C', or 'D'.", + "atk_acc": 43.44, + "atk_prompt": "Considering your familiarity with {}, attePd to the following multiple-hcoice question by pickiGng 'A', 'B', 'C', or 'D'.", + "transfer_ori_acc": 8.156028368794328, + "transfer_atk_acc": 9.397163120567376 + } +] \ No newline at end of file diff --git a/results_transfer/google-flan-t5-large_vicuna-13b/stresstest_0_shot.json b/results_transfer/google-flan-t5-large_vicuna-13b/stresstest_0_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..42072ba0bb9c194677cbc604a72b310c3ad111b8 --- /dev/null +++ b/results_transfer/google-flan-t5-large_vicuna-13b/stresstest_0_shot.json @@ -0,0 +1,162 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 78.0, + "origin_prompt": "Assess the following sentence and determine if it is grammatically correct. Respond with 'Acceptable' or 'Unacceptable':", + "atk_acc": 63.5, + "atk_prompt": "Assess the following sentence and determine if it is grammatically correct. Respond with 'Acceptable' or 'Unacceptable' and false is not true :", + "transfer_ori_acc": 69.0, + "transfer_atk_acc": 34.8 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 76.3, + "origin_prompt": "As an instrument for grammar evaluation, consider the sentence and determine if its grammar is correct, responding with 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar:", + "atk_acc": 62.9, + "atk_prompt": "As an instrument for grammar evaluation, consider the sentence and determine if its grammar is correct, responding with 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar and true is true and true is true and true is true and true is true and true is true :", + "transfer_ori_acc": 36.199999999999996, + "transfer_atk_acc": 54.800000000000004 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 82.11, + "origin_prompt": "Do the meanings of these two statements align? Indicate your answer with 'equivalent' or 'not_equivalent'.", + "atk_acc": 29.66, + "atk_prompt": "Do the meanings of these two statements align? Indicate your answer with 'equivalent' or 'not_equivalent' and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 45.34313725490196, + "transfer_atk_acc": 61.274509803921575 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 81.62, + "origin_prompt": "As a language comparison expert, examine the given pair of sentences and decide if their meanings align, answering with 'equivalent' or 'not_equivalent'.", + "atk_acc": 15.93, + "atk_prompt": "As a language comparison expert, examine the given pair of sentences and decide if their meanings align, answering with 'equivalent' or 'not_equivalent' and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 63.23529411764706, + "transfer_atk_acc": 26.225490196078432 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 88.7, + "origin_prompt": "Based on the information in the context, decide if the answer to the question is justified by choosing 'entailment' or 'not_entailment'.", + "atk_acc": 78.6, + "atk_prompt": "Based on the information in the context, decide if the answer to the question is justified by choosing 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 34.1, + "transfer_atk_acc": 9.8 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 89.2, + "origin_prompt": "As a textual analyst, examine if the given context logically implies the answer to the question and indicate your decision with 'entailment' or 'not_entailment'.", + "atk_acc": 57.2, + "atk_prompt": "As a textual analyst, examine if the given context logically implies the answer to the question and indicate your decision with 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 13.8, + "transfer_atk_acc": 10.6 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 88.1, + "origin_prompt": "Are the following two questions equivalent or not? Answer me with \"equivalent\" or \"not_equivalent\".", + "atk_acc": 24.0, + "atk_prompt": "Are the following two questions equivalent or not? Answer me with \"equivalent\" or \"not_equivalent and false is not true \". ", + "transfer_ori_acc": 23.9, + "transfer_atk_acc": 30.599999999999998 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 86.9, + "origin_prompt": "In the role of a question similarity analyzer, examine the meaning of the provided questions and decide if they are equivalent, answering with 'equivalent' for equivalent questions or 'not_equivalent' for different questions.", + "atk_acc": 86.4, + "atk_prompt": "In the role of a question similarity analyzer, examine the meaning of the provided questions and decide if they are equivalent, answering with 'equivalent' for equivalent questions or 'not_equivalent' for different questions and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 23.3, + "transfer_atk_acc": 22.2 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 86.64, + "origin_prompt": "Analyze the two provided sentences and decide if their relationship is 'entailment' or 'not_entailment'.", + "atk_acc": 43.32, + "atk_prompt": "Analyze the two provided sentences and decide if their relationship is 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true .", + "transfer_ori_acc": 18.4115523465704, + "transfer_atk_acc": 16.967509025270758 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 84.84, + "origin_prompt": "In the capacity of an entailment assessment system, indicate if the link between the following sentences is 'entailment' or 'not_entailment':", + "atk_acc": 38.27, + "atk_prompt": "In the capacity of an entailment assessment system, indicate if the link between the following sentences is 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true :", + "transfer_ori_acc": 43.682310469314075, + "transfer_atk_acc": 35.37906137184115 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 94.84, + "origin_prompt": "Analyze the tone of this statement and respond with either 'positive' or 'negative':", + "atk_acc": 92.43, + "atk_prompt": "Analyze the tone of this statement and respond with either 'positive' or 'negative' and true is true and true is true and true is true and true is true and true is true : ", + "transfer_ori_acc": 47.935779816513765, + "transfer_atk_acc": 43.23394495412844 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 95.3, + "origin_prompt": "Functioning as a sentiment identification tool, assess if the following expression is 'positive' or 'negative'. Classify:", + "atk_acc": 93.81, + "atk_prompt": "Functioning as a sentiment identification tool, assess if the following expression is 'positive' or 'negative'. Classify and false is not true : ", + "transfer_ori_acc": 38.646788990825684, + "transfer_atk_acc": 36.353211009174316 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 66.2, + "origin_prompt": "Analyze the two provided sentences and decide if their relationship is 'entailment' or 'not_entailment'.", + "atk_acc": 26.76, + "atk_prompt": "Analyze the two provided sentences and decide if their relationship is 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true .", + "transfer_ori_acc": 7.042253521126761, + "transfer_atk_acc": 29.577464788732392 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 64.79, + "origin_prompt": "In the capacity of an entailment assessment system, indicate if the link between the following sentences is 'entailment' or 'not_entailment':", + "atk_acc": 25.35, + "atk_prompt": "In the capacity of an entailment assessment system, indicate if the link between the following sentences is 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true :", + "transfer_ori_acc": 43.66197183098591, + "transfer_atk_acc": 33.80281690140845 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 45.74, + "origin_prompt": "For each multiple-choice question about {}, identify the correct answer by selecting 'A', 'B', 'C', or 'D'.", + "atk_acc": 44.33, + "atk_prompt": "For each multiple-choice question about {}, identify the correct answer by selecting 'A', 'B', 'C', or 'D' and false is not true . ", + "transfer_ori_acc": 22.69503546099291, + "transfer_atk_acc": 25.177304964539005 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 44.86, + "origin_prompt": "Utilizing your expertise in {}, answer the following multiple-choice question by picking 'A', 'B', 'C', or 'D'.", + "atk_acc": 43.97, + "atk_prompt": "Utilizing your expertise in {}, answer the following multiple-choice question by picking 'A', 'B', 'C', or 'D' and false is not true .", + "transfer_ori_acc": 20.74468085106383, + "transfer_atk_acc": 20.74468085106383 + } +] \ No newline at end of file diff --git a/results_transfer/google-flan-t5-large_vicuna-13b/stresstest_3_shot.json b/results_transfer/google-flan-t5-large_vicuna-13b/stresstest_3_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..5eaa7c4065a36de556556b83fba68d457116e357 --- /dev/null +++ b/results_transfer/google-flan-t5-large_vicuna-13b/stresstest_3_shot.json @@ -0,0 +1,162 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 77.8, + "origin_prompt": "Is the provided sentence grammatically correct? Respond with 'Acceptable' or 'Unacceptable'.", + "atk_acc": 75.5, + "atk_prompt": "Is the provided sentence grammatically correct? Respond with 'Acceptable' or 'Unacceptable' and false is not true . ", + "transfer_ori_acc": 6.0, + "transfer_atk_acc": 10.5 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 75.8, + "origin_prompt": "As an instrument for grammar evaluation, consider the sentence and determine if its grammar is correct, responding with 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar.", + "atk_acc": 75.2, + "atk_prompt": "As an instrument for grammar evaluation, consider the sentence and determine if its grammar is correct, responding with 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar and false is not true . ", + "transfer_ori_acc": 15.299999999999999, + "transfer_atk_acc": 10.0 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 83.09, + "origin_prompt": "Evaluate whether the two provided sentences convey the same meaning by answering 'equivalent' or 'not_equivalent'.", + "atk_acc": 24.51, + "atk_prompt": "Evaluate whether the two provided sentences convey the same meaning by answering 'equivalent' or 'not_equivalent' and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 65.19607843137256, + "transfer_atk_acc": 69.11764705882352 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 80.15, + "origin_prompt": "As a language comparison expert, examine the given pair of sentences and decide if their meanings align, answering with 'equivalent' or 'not_equivalent'.", + "atk_acc": 17.4, + "atk_prompt": "As a language comparison expert, examine the given pair of sentences and decide if their meanings align, answering with 'equivalent' or 'not_equivalent' and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 67.15686274509804, + "transfer_atk_acc": 68.62745098039215 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 87.8, + "origin_prompt": "Based on the information in the context, decide if the answer to the question is justified by choosing 'entailment' or 'not_entailment'.", + "atk_acc": 79.4, + "atk_prompt": "Based on the information in the context, decide if the answer to the question is justified by choosing 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 42.5, + "transfer_atk_acc": 6.800000000000001 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 88.8, + "origin_prompt": "In the capacity of a language evaluator, examine if the given context justifies the answer to the question and indicate your assessment with 'entailment' or 'not_entailment'.", + "atk_acc": 68.9, + "atk_prompt": "In the capacity of a language evaluator, examine if the given context justifies the answer to the question and indicate your assessment with 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 34.4, + "transfer_atk_acc": 14.099999999999998 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 88.1, + "origin_prompt": "Are the following two questions equivalent or not? Answer me with \"equivalent\" or \"not_equivalent\".", + "atk_acc": 23.4, + "atk_prompt": "Are the following two questions equivalent or not? Answer me with \"equivalent\" or \"not_equivalent and false is not true \". ", + "transfer_ori_acc": 34.8, + "transfer_atk_acc": 54.50000000000001 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 86.5, + "origin_prompt": "As an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions.", + "atk_acc": 86.4, + "atk_prompt": "As an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions and true is true . ", + "transfer_ori_acc": 35.3, + "transfer_atk_acc": 41.6 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 84.84, + "origin_prompt": "Identify whether the given pair of sentences demonstrates entailment or not_entailment. Answer with 'entailment' or 'not_entailment'.", + "atk_acc": 43.68, + "atk_prompt": "Identify whether the given pair of sentences demonstrates entailment or not_entailment. Answer with 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 7.2202166064981945, + "transfer_atk_acc": 0.0 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 84.12, + "origin_prompt": "In the capacity of an entailment assessment system, indicate if the link between the following sentences is 'entailment' or 'not_entailment'.", + "atk_acc": 36.82, + "atk_prompt": "In the capacity of an entailment assessment system, indicate if the link between the following sentences is 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 21.299638989169676, + "transfer_atk_acc": 0.0 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 95.3, + "origin_prompt": "Considering the given phrase, would you say it carries a 'positive' or 'negative' connotation?", + "atk_acc": 94.61, + "atk_prompt": "Considering the given phrase, would you say it carries a 'positive' or 'negative' connotation and false is not true ? ", + "transfer_ori_acc": 1.7201834862385321, + "transfer_atk_acc": 0.11467889908256881 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 95.41, + "origin_prompt": "Acting as a sentiment evaluator, identify if the given sentence is 'positive' or 'negative'.", + "atk_acc": 95.07, + "atk_prompt": "Acting as a sentiment evaluator, identify if the given sentence is 'positive' or 'negative' and true is true . ", + "transfer_ori_acc": 10.77981651376147, + "transfer_atk_acc": 16.399082568807337 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 66.2, + "origin_prompt": "Analyze the two provided sentences and decide if their relationship is 'entailment' or 'not_entailment'.", + "atk_acc": 35.21, + "atk_prompt": "Analyze the two provided sentences and decide if their relationship is 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 14.084507042253522, + "transfer_atk_acc": 1.4084507042253522 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 66.2, + "origin_prompt": "In the capacity of an entailment assessment system, indicate if the link between the following sentences is 'entailment' or 'not_entailment'.", + "atk_acc": 23.94, + "atk_prompt": "In the capacity of an entailment assessment system, indicate if the link between the following sentences is 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 9.859154929577464, + "transfer_atk_acc": 0.0 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 45.92, + "origin_prompt": "Answer the subsequent multiple-choice question about {} by picking the right option among 'A', 'B', 'C', or 'D'.", + "atk_acc": 44.86, + "atk_prompt": "Answer the subsequent multiple-choice question about {} by picking the right option among 'A', 'B', 'C', or 'D' and false is not true . ", + "transfer_ori_acc": 7.092198581560284, + "transfer_atk_acc": 12.76595744680851 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 46.63, + "origin_prompt": "As someone well-versed in {}, please address the multiple-choice question below by selecting 'A', 'B', 'C', or 'D'.", + "atk_acc": 44.86, + "atk_prompt": "As someone well-versed in {}, please address the multiple-choice question below by selecting 'A', 'B', 'C', or 'D' and false is not true .", + "transfer_ori_acc": 8.51063829787234, + "transfer_atk_acc": 15.602836879432624 + } +] \ No newline at end of file diff --git a/results_transfer/google-flan-t5-large_vicuna-13b/textbugger_0_shot.json b/results_transfer/google-flan-t5-large_vicuna-13b/textbugger_0_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..8594e4f0696b9bf187d8b809c96d52ef8a23ee7e --- /dev/null +++ b/results_transfer/google-flan-t5-large_vicuna-13b/textbugger_0_shot.json @@ -0,0 +1,162 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 77.9, + "origin_prompt": "Examine the given sentence and decide if it is grammatically sound. Answer with either 'Acceptable' or 'Unacceptable':", + "atk_acc": 32.1, + "atk_prompt": "Examining the given condemnation and decide if it is pronoun sounds. Answer with neither 'Acceptable' or 'Unacceptable':", + "transfer_ori_acc": 50.0, + "transfer_atk_acc": 32.4 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 76.3, + "origin_prompt": "As an instrument for grammar evaluation, consider the sentence and determine if its grammar is correct, responding with 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar:", + "atk_acc": 46.8, + "atk_prompt": "Because an devices for grammar assessment, consdier the punishments and determining if its grammar is correct, responԁing with 'acceptable' for correct grammar or 'unacceptable' for incorrect pronunciation:", + "transfer_ori_acc": 36.199999999999996, + "transfer_atk_acc": 50.8 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 82.11, + "origin_prompt": "Do the meanings of these two statements align? Indicate your answer with 'equivalent' or 'not_equivalent'.", + "atk_acc": 75.25, + "atk_prompt": "Do the meanings of these two statemеnts alіgn? Denotes your answer with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 45.34313725490196, + "transfer_atk_acc": 40.931372549019606 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 81.62, + "origin_prompt": "As a language comparison expert, examine the given pair of sentences and decide if their meanings align, answering with 'equivalent' or 'not_equivalent'.", + "atk_acc": 74.26, + "atk_prompt": "As a language comparison e×pert, examine the given p air of sentences and decide if their meaings alin, answering with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 63.23529411764706, + "transfer_atk_acc": 23.52941176470588 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 89.8, + "origin_prompt": "Evaluate whether the given context supports the answer to the question by responding with 'entailment' or 'not_entailment'.", + "atk_acc": 56.8, + "atk_prompt": "Evɑluate lf the given context supports the replying to the question by answer with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 26.5, + "transfer_atk_acc": 12.6 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 89.4, + "origin_prompt": "As a semantic interpreter, assess whether the provided context supports the answer to the given question and answer with 'entailment' or 'not_entailment'.", + "atk_acc": 50.8, + "atk_prompt": "Since a semantic interpreter, as sess lf the provіded background supports the answer to the ɡiven question and answer with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 8.7, + "transfer_atk_acc": 7.3999999999999995 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 88.1, + "origin_prompt": "Are the following two questions equivalent or not? Answer me with \"equivalent\" or \"not_equivalent\".", + "atk_acc": 78.6, + "atk_prompt": "Are the following tԝo matters equivalent or not? Replying me with \"equivalent\" or \"not_equivalent\". ", + "transfer_ori_acc": 23.9, + "transfer_atk_acc": 18.5 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 87.7, + "origin_prompt": "Acting as a question equivalence instrument, determine if the provided questions are equivalent in meaning, answering with 'equivalent' for similar questions or 'not_equivalent' for dissimilar ones.", + "atk_acc": 82.5, + "atk_prompt": "Actinɡ as a question equivaleոce inst rument, determine if the prvided questions are equivalent in mean, answering with 'equivalent' for similar questions or 'not_equivalent' for dіssimilar ones. ", + "transfer_ori_acc": 25.0, + "transfer_atk_acc": 6.0 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 86.64, + "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.", + "atk_acc": 82.31, + "atk_prompt": "Arе the following to sentences entailment or not_entailment? Respond me with \"entailment\" or \"not_entailment\", just one word. ", + "transfer_ori_acc": 27.075812274368232, + "transfer_atk_acc": 7.9422382671480145 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 85.56, + "origin_prompt": "As an instrument for entailment evaluation, consider the two sentences and determine if their relationship is 'entailment' or 'not_entailment'. Respond with 'entailment' or 'not_entailment':", + "atk_acc": 82.67, + "atk_prompt": "As an instrument for entailment evaluation, consider the t wo sentences and determine if their relationѕhip is 'entailment' or 'not_entailment'. Respond with 'entailment' or 'not_entailment':", + "transfer_ori_acc": 24.90974729241877, + "transfer_atk_acc": 24.187725631768952 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 94.95, + "origin_prompt": "Given the context of this text, indicate if the emotion conveyed is 'positive' or 'negative':", + "atk_acc": 76.26, + "atk_prompt": "Giveո the background of this text, depicts if the emotions transmitted is 'positive' or 'negative': ", + "transfer_ori_acc": 29.472477064220183, + "transfer_atk_acc": 42.6605504587156 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 95.3, + "origin_prompt": "Serving as a sentiment evaluation model, determine if the given statement is 'positive' or 'negative'. Classify:", + "atk_acc": 88.53, + "atk_prompt": "Serving as a emotion evluation models, determining if the conferred state ment is 'positive' or 'negative'. Classify: ", + "transfer_ori_acc": 26.261467889908257, + "transfer_atk_acc": 21.3302752293578 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 66.2, + "origin_prompt": "Analyze the two provided sentences and decide if their relationship is 'entailment' or 'not_entailment'.", + "atk_acc": 57.75, + "atk_prompt": "Analyse the two provided sentences and dеcide if their relations is 'entailment' or 'not_entailment'.", + "transfer_ori_acc": 7.042253521126761, + "transfer_atk_acc": 7.042253521126761 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 66.2, + "origin_prompt": "As an entailment identification system, examine the connection between the following sentences and respond with 'entailment' or 'not_entailment':", + "atk_acc": 57.75, + "atk_prompt": "As an entailment identification system, investigated the connection between the subsequently sentences and replying with 'entailment' or 'not_entailment':", + "transfer_ori_acc": 43.66197183098591, + "transfer_atk_acc": 39.436619718309856 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 45.74, + "origin_prompt": "For each multiple-choice question about {}, identify the correct answer by selecting 'A', 'B', 'C', or 'D'.", + "atk_acc": 43.26, + "atk_prompt": "For each multip le-choice question about {}, identify the rectify answer by selеcting 'A', 'B', 'C', or 'D'. ", + "transfer_ori_acc": 22.69503546099291, + "transfer_atk_acc": 25.354609929078016 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 44.86, + "origin_prompt": "Utilizing your expertise in {}, answer the following multiple-choice question by picking 'A', 'B', 'C', or 'D'.", + "atk_acc": 40.96, + "atk_prompt": "Utilizіng your expertise in {}, answeⲅ the following multiple-choice question by collection 'A', 'B', 'C', or 'D'.", + "transfer_ori_acc": 20.74468085106383, + "transfer_atk_acc": 33.51063829787234 + } +] \ No newline at end of file diff --git a/results_transfer/google-flan-t5-large_vicuna-13b/textbugger_3_shot.json b/results_transfer/google-flan-t5-large_vicuna-13b/textbugger_3_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..0f328128d80654a331df8b682ceaa99228cf159f --- /dev/null +++ b/results_transfer/google-flan-t5-large_vicuna-13b/textbugger_3_shot.json @@ -0,0 +1,162 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 77.9, + "origin_prompt": "Examine the given sentence and decide if it is grammatically sound. Answer with either 'Acceptable' or 'Unacceptable'.", + "atk_acc": 54.9, + "atk_prompt": "Examine the yielded sentence and decide if it is pronouns audio. Answer with neither 'Acceptable' or 'Unacceptable'. ", + "transfer_ori_acc": 0.8999999999999999, + "transfer_atk_acc": 18.4 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 76.4, + "origin_prompt": "Acting as a grammar detection instrument, determine if the provided sentence is grammatically sound, answering with 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar.", + "atk_acc": 70.0, + "atk_prompt": "Acting as a grammatical detection instruments, determining if the provided condemnation is grammatically sоund, answer with 'acceptable' for corⲅect grammatical or 'unacceptable' for incоrrect grɑmmar. ", + "transfer_ori_acc": 36.4, + "transfer_atk_acc": 13.700000000000001 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 82.6, + "origin_prompt": "Can the given sentences be considered semantically identical? Please reply with 'equivalent' or 'not_equivalent'.", + "atk_acc": 78.68, + "atk_prompt": "Can the given sntences be considered semantically idеntical? Please reply with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 65.44117647058823, + "transfer_atk_acc": 64.2156862745098 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 80.15, + "origin_prompt": "In the role of a sentence comparison analyst, assess the provided sentences and indicate if they convey the same meaning by selecting 'equivalent' or 'not_equivalent'.", + "atk_acc": 75.49, + "atk_prompt": "In the role of a sentence compraison psychoanalyst, assess the provided sentences and indicatе if they convey the same meani ng by selecting 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 68.87254901960785, + "transfer_atk_acc": 50.0 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 88.8, + "origin_prompt": "Evaluate whether the given context supports the answer to the question by responding with 'entailment' or 'not_entailment'.", + "atk_acc": 62.1, + "atk_prompt": "Evluate both the giѵen context supports the answer to the question by responding with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 18.0, + "transfer_atk_acc": 7.8 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 88.8, + "origin_prompt": "In the capacity of a language evaluator, examine if the given context justifies the answer to the question and indicate your assessment with 'entailment' or 'not_entailment'.", + "atk_acc": 56.0, + "atk_prompt": "In the capacity of a language evaluator, examining if the giѵen backgrounds ϳustifies the answer to the matter and indicates your assessment with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 34.4, + "transfer_atk_acc": 19.7 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 88.1, + "origin_prompt": "Are the following two questions equivalent or not? Answer me with \"equivalent\" or \"not_equivalent\".", + "atk_acc": 80.1, + "atk_prompt": "Are the following tԝo subjects equivalent or not? Anser me with \"equivalent\" or \"not_equivalent\". ", + "transfer_ori_acc": 34.8, + "transfer_atk_acc": 31.6 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 86.5, + "origin_prompt": "As an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions.", + "atk_acc": 75.7, + "atk_prompt": "Since an instrument for question compares evaluation, consider the problems and identify if their connotation is the same, respondinɡ with 'equivalent' for similar question or 'not_equivalent' for assorted issues. ", + "transfer_ori_acc": 35.3, + "transfer_atk_acc": 31.3 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 85.2, + "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.", + "atk_acc": 81.59, + "atk_prompt": "Be the fоllowing t wo sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word. ", + "transfer_ori_acc": 49.81949458483754, + "transfer_atk_acc": 33.935018050541515 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 84.12, + "origin_prompt": "In the capacity of an entailment assessment system, indicate if the link between the following sentences is 'entailment' or 'not_entailment'.", + "atk_acc": 81.59, + "atk_prompt": "For the capabilities of an entailment appraisals system, indicate if the link between the fo llowing sentences is 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 21.299638989169676, + "transfer_atk_acc": 36.101083032490976 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 94.95, + "origin_prompt": "Analyze the tone of this statement and respond with either 'positive' or 'negative'.", + "atk_acc": 94.15, + "atk_prompt": "Analyze the tone of this declaration and cater with or 'positive' or 'negative'. ", + "transfer_ori_acc": 12.5, + "transfer_atk_acc": 10.435779816513762 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 95.41, + "origin_prompt": "Acting as a sentiment evaluator, identify if the given sentence is 'positive' or 'negative'.", + "atk_acc": 93.92, + "atk_prompt": "Acting as a ambience evaluatoⲅ, identifies if the awarded sentence is 'positive' or 'negative'. ", + "transfer_ori_acc": 10.77981651376147, + "transfer_atk_acc": 6.077981651376147 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 64.79, + "origin_prompt": "Assess the connection between the following sentences and classify it as 'entailment' or 'not_entailment'.", + "atk_acc": 57.75, + "atk_prompt": "Assess the connecting between the subsequent sentence and classify it as 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 19.718309859154928, + "transfer_atk_acc": 18.30985915492958 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 66.2, + "origin_prompt": "Functioning as an entailment evaluation tool, analyze the provided sentences and decide if their relationship is 'entailment' or 'not_entailment'.", + "atk_acc": 57.75, + "atk_prompt": "Functioning as an entailment evaluation tool, analyze the furnished sentences and decide if their relationshiр is 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 9.859154929577464, + "transfer_atk_acc": 5.633802816901409 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 46.81, + "origin_prompt": "In relation to the multiple-choice question on {}, please provide the accurate answer by choosing 'A', 'B', 'C', or 'D'.", + "atk_acc": 43.62, + "atk_prompt": "In relation to the mlutiple-choice question on {}, pelase provide the meticulous answer by opt 'A', 'B', 'C', or 'D'. ", + "transfer_ori_acc": 9.397163120567376, + "transfer_atk_acc": 6.205673758865248 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 46.99, + "origin_prompt": "Considering your familiarity with {}, attend to the following multiple-choice question by picking 'A', 'B', 'C', or 'D'.", + "atk_acc": 43.97, + "atk_prompt": "Considering your knowledge with {}, attend to the following multip le-choice questions by рicking 'A', 'B', 'C', or 'D'.", + "transfer_ori_acc": 8.156028368794328, + "transfer_atk_acc": 10.815602836879433 + } +] \ No newline at end of file diff --git a/results_transfer/google-flan-t5-large_vicuna-13b/textfooler_0_shot.json b/results_transfer/google-flan-t5-large_vicuna-13b/textfooler_0_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..90a3f56fb907b771db0935a7fb6c6d5d828961a3 --- /dev/null +++ b/results_transfer/google-flan-t5-large_vicuna-13b/textfooler_0_shot.json @@ -0,0 +1,162 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 77.5, + "origin_prompt": "Determine if the grammar of the given sentence is 'Acceptable' or 'Unacceptable':", + "atk_acc": 31.0, + "atk_prompt": "Establishes if the jargon of the made punishments is 'Acceptable' or 'Unacceptable':", + "transfer_ori_acc": 36.4, + "transfer_atk_acc": 15.0 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 76.3, + "origin_prompt": "As an instrument for grammar evaluation, consider the sentence and determine if its grammar is correct, responding with 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar:", + "atk_acc": 30.8, + "atk_prompt": "Since an apparatus for language measured, envisage the convicts and identify if its language is correct, reaction with 'acceptable' for accurate jargon or 'unacceptable' for unsound pronunciation:", + "transfer_ori_acc": 36.199999999999996, + "transfer_atk_acc": 19.8 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 82.11, + "origin_prompt": "Do these two sentences have the same underlying meaning? Respond with 'equivalent' or 'not_equivalent'.", + "atk_acc": 53.68, + "atk_prompt": "Ca these two condemning ont the same main meaning? Reactions with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 54.90196078431373, + "transfer_atk_acc": 32.59803921568628 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 81.62, + "origin_prompt": "As a language comparison expert, examine the given pair of sentences and decide if their meanings align, answering with 'equivalent' or 'not_equivalent'.", + "atk_acc": 12.01, + "atk_prompt": "As a language compared professional, reviews the given torque of sentences and picked if their sens unify, address with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 63.23529411764706, + "transfer_atk_acc": 6.61764705882353 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 89.8, + "origin_prompt": "Evaluate whether the given context supports the answer to the question by responding with 'entailment' or 'not_entailment'.", + "atk_acc": 41.4, + "atk_prompt": "Estimation whether the gave backgrounds perpetuates the answered to the trouble by responses with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 26.5, + "transfer_atk_acc": 4.3999999999999995 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 89.4, + "origin_prompt": "As a semantic interpreter, assess whether the provided context supports the answer to the given question and answer with 'entailment' or 'not_entailment'.", + "atk_acc": 3.3, + "atk_prompt": "Iike a semantic performers, valuing whether the afforded ambient perpetuates the answer to the rendered item and cope with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 8.7, + "transfer_atk_acc": 4.1000000000000005 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 87.7, + "origin_prompt": "Assess whether the following statements are identical in meaning by answering 'equivalent' or 'not_equivalent'.", + "atk_acc": 18.9, + "atk_prompt": "Valuing whether the later languages are comparable in sinn by cope 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 30.2, + "transfer_atk_acc": 0.8 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 86.9, + "origin_prompt": "In the role of a question similarity analyzer, examine the meaning of the provided questions and decide if they are equivalent, answering with 'equivalent' for equivalent questions or 'not_equivalent' for different questions.", + "atk_acc": 73.8, + "atk_prompt": "About the rol of a question likeness analyzer, examine the mean of the conferred topic and identifying if they are equivalent, retaliated with 'equivalent' for equivalent trouble or 'not_equivalent' for other topics. ", + "transfer_ori_acc": 23.3, + "transfer_atk_acc": 14.000000000000002 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 86.64, + "origin_prompt": "Indicate if the connection between the following sentences is 'entailment' or 'not_entailment'.", + "atk_acc": 80.87, + "atk_prompt": "Portrays if the login between the following punishments is 'entailment' or 'not_entailment'.", + "transfer_ori_acc": 38.26714801444043, + "transfer_atk_acc": 3.6101083032490973 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 84.84, + "origin_prompt": "In the capacity of an entailment assessment system, indicate if the link between the following sentences is 'entailment' or 'not_entailment':", + "atk_acc": 79.06, + "atk_prompt": "About the capacity of an entailment weighed system, specified if the link between the following phrases is 'entailment' or 'not_entailment':", + "transfer_ori_acc": 43.682310469314075, + "transfer_atk_acc": 25.992779783393498 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 94.95, + "origin_prompt": "Given the context of this text, indicate if the emotion conveyed is 'positive' or 'negative':", + "atk_acc": 84.29, + "atk_prompt": "Made the background of this version, depicts if the passion recharged is 'positive' or 'negative': ", + "transfer_ori_acc": 29.472477064220183, + "transfer_atk_acc": 11.353211009174311 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 95.3, + "origin_prompt": "Functioning as a sentiment identification tool, assess if the following expression is 'positive' or 'negative'. Classify:", + "atk_acc": 25.46, + "atk_prompt": "Functions as a emotion defining instrumentation, valuing if the next voicing is 'positive' or 'negative'. Classification: ", + "transfer_ori_acc": 38.646788990825684, + "transfer_atk_acc": 24.541284403669724 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 66.2, + "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.", + "atk_acc": 56.34, + "atk_prompt": "Be the following two phrases entailment or not_entailment? Responded me with \"entailment\" or \"not_entailment\", just one verb. ", + "transfer_ori_acc": 25.352112676056336, + "transfer_atk_acc": 32.3943661971831 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 64.79, + "origin_prompt": "In the capacity of an entailment assessment system, indicate if the link between the following sentences is 'entailment' or 'not_entailment':", + "atk_acc": 56.34, + "atk_prompt": "Among the capacity of an entailment analysis system, indicate if the attach between the subsequently sentences is 'entailment' or 'not_entailment':", + "transfer_ori_acc": 43.66197183098591, + "transfer_atk_acc": 42.25352112676056 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 45.74, + "origin_prompt": "For each multiple-choice question about {}, identify the correct answer by selecting 'A', 'B', 'C', or 'D'.", + "atk_acc": 37.06, + "atk_prompt": "By any multiple-choice question about {}, locate the corrected riposte by akhtar 'A', 'B', 'C', or 'D'. ", + "transfer_ori_acc": 22.69503546099291, + "transfer_atk_acc": 22.872340425531913 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 44.86, + "origin_prompt": "Utilizing your expertise in {}, answer the following multiple-choice question by picking 'A', 'B', 'C', or 'D'.", + "atk_acc": 40.96, + "atk_prompt": "Utilise your expertise in {}, meet the next multiple-choice question by collection 'A', 'B', 'C', or 'D'.", + "transfer_ori_acc": 20.74468085106383, + "transfer_atk_acc": 25.177304964539005 + } +] \ No newline at end of file diff --git a/results_transfer/google-flan-t5-large_vicuna-13b/textfooler_3_shot.json b/results_transfer/google-flan-t5-large_vicuna-13b/textfooler_3_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..d06da59c4a1a179b38e3c84ab8ea942b5fd5a0fa --- /dev/null +++ b/results_transfer/google-flan-t5-large_vicuna-13b/textfooler_3_shot.json @@ -0,0 +1,162 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 77.8, + "origin_prompt": "Is the provided sentence grammatically correct? Respond with 'Acceptable' or 'Unacceptable'.", + "atk_acc": 70.3, + "atk_prompt": "Become the brought sentence grammatically correction? Reactions with 'Acceptable' or 'Unacceptable'. ", + "transfer_ori_acc": 6.0, + "transfer_atk_acc": 28.799999999999997 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 75.7, + "origin_prompt": "While performing grammar analysis, classify the grammar of the following sentence as 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar.", + "atk_acc": 70.5, + "atk_prompt": "If performing pronunciation analyzing, classify the grammar of the following condemnation as 'acceptable' for adequate jargon or 'unacceptable' for misspelled dictionary. ", + "transfer_ori_acc": 49.9, + "transfer_atk_acc": 26.900000000000002 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 83.09, + "origin_prompt": "Evaluate whether the two provided sentences convey the same meaning by answering 'equivalent' or 'not_equivalent'.", + "atk_acc": 78.92, + "atk_prompt": "Estimation whether the two provided convictions despatch the same meaning by answers 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 65.19607843137256, + "transfer_atk_acc": 71.32352941176471 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 80.15, + "origin_prompt": "In the role of a sentence comparison analyst, assess the provided sentences and indicate if they convey the same meaning by selecting 'equivalent' or 'not_equivalent'.", + "atk_acc": 75.25, + "atk_prompt": "In the role of a sentence likened psychoanalyst, assess the awarded sentences and indicate if they convey the same feeling by selecting 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 68.87254901960785, + "transfer_atk_acc": 60.049019607843135 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 88.8, + "origin_prompt": "Evaluate whether the given context supports the answer to the question by responding with 'entailment' or 'not_entailment'.", + "atk_acc": 36.0, + "atk_prompt": "Valuing whether the bestowed framework perpetuates the responded to the tura by retaliated with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 18.0, + "transfer_atk_acc": 10.8 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 88.8, + "origin_prompt": "In the capacity of a language evaluator, examine if the given context justifies the answer to the question and indicate your assessment with 'entailment' or 'not_entailment'.", + "atk_acc": 29.8, + "atk_prompt": "Of the jurisdictional of a phraseology evaluator, exams if the handed regard vindicated the solution to the matter and clarified your examinations with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 34.4, + "transfer_atk_acc": 8.0 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 88.1, + "origin_prompt": "textfooler", + "atk_acc": 70.9, + "atk_prompt": "Become the farther two subjects equivalent or not? Retaliated me with \"equivalent\" or \"not_equivalent\". ", + "transfer_ori_acc": 48.3, + "transfer_atk_acc": 35.0 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 86.5, + "origin_prompt": "As an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions.", + "atk_acc": 63.4, + "atk_prompt": "As an tool for topic relation valuing, look the theme and identify if their connotation is the same, retaliated with 'equivalent' for similar question or 'not_equivalent' for other trouble. ", + "transfer_ori_acc": 35.3, + "transfer_atk_acc": 32.2 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 84.84, + "origin_prompt": "Identify whether the given pair of sentences demonstrates entailment or not_entailment. Answer with 'entailment' or 'not_entailment'.", + "atk_acc": 80.51, + "atk_prompt": "Defined whether the made peers of sentences showings entailment or not_entailment. Reaction with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 7.2202166064981945, + "transfer_atk_acc": 43.32129963898917 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 84.12, + "origin_prompt": "Acting as an entailment detection instrument, determine if the given pair of sentences demonstrates entailment or not_entailment. Answer with 'entailment' or 'not_entailment'.", + "atk_acc": 79.78, + "atk_prompt": "Acting as an entailment detects instrument, defined if the given torque of sentences showcases entailment or not_entailment. Replies with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 4.332129963898916, + "transfer_atk_acc": 45.48736462093863 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 94.95, + "origin_prompt": "Analyze the tone of this statement and respond with either 'positive' or 'negative'.", + "atk_acc": 94.27, + "atk_prompt": "Explore the tone of this affirmations and react with either 'positive' or 'negative'. ", + "transfer_ori_acc": 12.5, + "transfer_atk_acc": 1.0321100917431194 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 95.53, + "origin_prompt": "In the role of a sentiment analysis tool, respond with 'positive' or 'negative' to classify this statement.", + "atk_acc": 94.04, + "atk_prompt": "In the role of a emotions exploring toolbox, respond with 'positive' or 'negative' to sorted this affirmations. ", + "transfer_ori_acc": 28.6697247706422, + "transfer_atk_acc": 1.834862385321101 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 64.79, + "origin_prompt": "Assess the connection between the following sentences and classify it as 'entailment' or 'not_entailment'.", + "atk_acc": 54.93, + "atk_prompt": "Assess the connecting between the subsequent phrases and classify it as 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 19.718309859154928, + "transfer_atk_acc": 18.30985915492958 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 66.2, + "origin_prompt": "Functioning as an entailment evaluation tool, analyze the provided sentences and decide if their relationship is 'entailment' or 'not_entailment'.", + "atk_acc": 56.34, + "atk_prompt": "Functioning as an entailment evaluation tool, analyze the afforded sentences and chosen if their liaise is 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 9.859154929577464, + "transfer_atk_acc": 30.985915492957744 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 46.45, + "origin_prompt": "For each multiple-choice question about {}, identify the correct answer by selecting 'A', 'B', 'C', or 'D'.", + "atk_acc": 37.41, + "atk_prompt": "About each multiple-choice question about {}, find the rectify rebuttal by choices 'A', 'B', 'C', or 'D'. ", + "transfer_ori_acc": 6.560283687943262, + "transfer_atk_acc": 7.446808510638298 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 46.99, + "origin_prompt": "Considering your familiarity with {}, attend to the following multiple-choice question by picking 'A', 'B', 'C', or 'D'.", + "atk_acc": 43.44, + "atk_prompt": "Explored your familiarity with {}, participate to the next multiple-choice issues by pickup 'A', 'B', 'C', or 'D'.", + "transfer_ori_acc": 8.156028368794328, + "transfer_atk_acc": 6.382978723404255 + } +] \ No newline at end of file diff --git a/results_transfer/google-flan-t5-large_vicuna-13b/translation_0_shot.json b/results_transfer/google-flan-t5-large_vicuna-13b/translation_0_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..d739f66a38e85dfc5c8b02f1f1f9406f19694f30 --- /dev/null +++ b/results_transfer/google-flan-t5-large_vicuna-13b/translation_0_shot.json @@ -0,0 +1,82 @@ +[ + { + "dataset": "cola", + "type": "translation", + "origin_acc": 78.0, + "origin_prompt": "Assess the following sentence and determine if it is grammatically correct. Respond with 'Acceptable' or 'Unacceptable':", + "atk_acc": 74.6, + "atk_prompt": "As a tool for classifying valid sentences, determine whether 'acceptable' or 'unacceptable' is based on its grammatical rules.", + "transfer_ori_acc": 69.0, + "transfer_atk_acc": 44.1 + }, + { + "dataset": "mrpc", + "type": "translation", + "origin_acc": 82.6, + "origin_prompt": "As a semantic comparison expert, evaluate the given pair of sentences and determine if they are 'equivalent' or 'not_equivalent'.", + "atk_acc": 77.7, + "atk_prompt": "Determine whether a given pair of sentences is 'equivalent' or 'not_equivalent', and whether they are semantically identical.", + "transfer_ori_acc": 32.35294117647059, + "transfer_atk_acc": 39.950980392156865 + }, + { + "dataset": "qnli", + "type": "translation", + "origin_acc": 89.8, + "origin_prompt": "Evaluate whether the given context supports the answer to the question by responding with 'entailment' or 'not_entailment'.", + "atk_acc": 53.1, + "atk_prompt": "Answer with 'entailment' or 'not_entailment', inferring from the given context.", + "transfer_ori_acc": 26.5, + "transfer_atk_acc": 17.9 + }, + { + "dataset": "qqp", + "type": "translation", + "origin_acc": 88.1, + "origin_prompt": "Are the following two questions equivalent or not? Answer me with \"equivalent\" or \"not_equivalent\".", + "atk_acc": 70.9, + "atk_prompt": "As a tool for determining the equivalence of problems, the problems are examined and their similarity is classified as 'equivalent' or 'not_equivalent'.", + "transfer_ori_acc": 23.9, + "transfer_atk_acc": 2.3 + }, + { + "dataset": "rte", + "type": "translation", + "origin_acc": 86.64, + "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.", + "atk_acc": 80.51, + "atk_prompt": "Determinate the semantic connections of a given sentence and classify it as 'entailment' or 'not_entailment'.", + "transfer_ori_acc": 27.075812274368232, + "transfer_atk_acc": 16.60649819494585 + }, + { + "dataset": "sst2", + "type": "translation", + "origin_acc": 95.41, + "origin_prompt": "In the role of a sentiment analysis tool, respond with 'positive' or 'negative' to classify this statement:", + "atk_acc": 92.78, + "atk_prompt": "Classify what you're trying to convey in this sentence as 'positive' if it's positive, and 'negative' if it's negative.", + "transfer_ori_acc": 32.798165137614674, + "transfer_atk_acc": 49.19724770642202 + }, + { + "dataset": "wnli", + "type": "translation", + "origin_acc": 66.2, + "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.", + "atk_acc": 49.3, + "atk_prompt": "Examine the paraphrases of a given sentence and classify them 'entailment' or 'not_entailment'.", + "transfer_ori_acc": 25.352112676056336, + "transfer_atk_acc": 14.084507042253522 + }, + { + "dataset": "mmlu", + "type": "translation", + "origin_acc": 45.74, + "origin_prompt": "For each multiple-choice question about {}, identify the correct answer by selecting 'A', 'B', 'C', or 'D'.", + "atk_acc": 41.84, + "atk_prompt": "Choose the correct answer for '{}' from 'A', 'B', 'C', or 'D' in the multiple choice problem involving,", + "transfer_ori_acc": 22.69503546099291, + "transfer_atk_acc": 28.368794326241137 + } +] \ No newline at end of file diff --git a/results_transfer/google-flan-t5-large_vicuna-13b/translation_3_shot.json b/results_transfer/google-flan-t5-large_vicuna-13b/translation_3_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..0400b4cb1f368831ce89cd2831741e65584395a1 --- /dev/null +++ b/results_transfer/google-flan-t5-large_vicuna-13b/translation_3_shot.json @@ -0,0 +1,82 @@ +[ + { + "dataset": "cola", + "type": "translation", + "origin_acc": 78.1, + "origin_prompt": "Assess the following sentence and determine if it is grammatically correct. Respond with 'Acceptable' or 'Unacceptable'.", + "atk_acc": 74.0, + "atk_prompt": "When evaluating the grammar of the sentence, classify it as 'acceptable' or 'unacceptable' according to the rules of the French language.", + "transfer_ori_acc": 0.8999999999999999, + "transfer_atk_acc": 16.0 + }, + { + "dataset": "mrpc", + "type": "translation", + "origin_acc": 83.09, + "origin_prompt": "Evaluate whether the two provided sentences convey the same meaning by answering 'equivalent' or 'not_equivalent'.", + "atk_acc": 79.66, + "atk_prompt": "Using the semantic comparison method, determine whether the following two statements are 'equivalent' or 'not_equivalent'.", + "transfer_ori_acc": 65.19607843137256, + "transfer_atk_acc": 68.38235294117648 + }, + { + "dataset": "qnli", + "type": "translation", + "origin_acc": 89.3, + "origin_prompt": "Based on the provided context and question, decide if the information supports the answer by responding with 'entailment' or 'not_entailment'.", + "atk_acc": 54.8, + "atk_prompt": "Answer with 'entailment' or 'not_entailment', inferring from the given context.", + "transfer_ori_acc": 32.4, + "transfer_atk_acc": 26.3 + }, + { + "dataset": "qqp", + "type": "translation", + "origin_acc": 88.1, + "origin_prompt": "Are the following two questions equivalent or not? Answer me with \"equivalent\" or \"not_equivalent\".", + "atk_acc": 77.9, + "atk_prompt": "As a tool for determining equivalence, check sentences and categorize them into 'equivalent' or 'not_equivalent' categories.", + "transfer_ori_acc": 34.8, + "transfer_atk_acc": 33.7 + }, + { + "dataset": "rte", + "type": "translation", + "origin_acc": 85.2, + "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.", + "atk_acc": 79.78, + "atk_prompt": "Rate the similarity of a given sentence and categorize it as 'entailment' or 'not_entailment'.", + "transfer_ori_acc": 49.81949458483754, + "transfer_atk_acc": 25.63176895306859 + }, + { + "dataset": "sst2", + "type": "translation", + "origin_acc": 95.53, + "origin_prompt": "In the role of a sentiment analysis tool, respond with 'positive' or 'negative' to classify this statement.", + "atk_acc": 93.23, + "atk_prompt": "As an emotion analysis tool, determine whether the emotion in the text is 'positive' or 'negative'.", + "transfer_ori_acc": 28.6697247706422, + "transfer_atk_acc": 5.275229357798166 + }, + { + "dataset": "wnli", + "type": "translation", + "origin_acc": 66.2, + "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.", + "atk_acc": 49.3, + "atk_prompt": "Examine the paraphrases of a given sentence and classify them 'entailment' or 'not_entailment'.", + "transfer_ori_acc": 33.80281690140845, + "transfer_atk_acc": 11.267605633802818 + }, + { + "dataset": "mmlu", + "type": "translation", + "origin_acc": 46.99, + "origin_prompt": "As an expert in {}, respond to the following multiple-choice question by selecting 'A', 'B', 'C', or 'D'.", + "atk_acc": 43.97, + "atk_prompt": "Choose the correct answer for '{}' from 'A', 'B', 'C', or 'D' in the multiple choice problem involving,", + "transfer_ori_acc": 9.042553191489363, + "transfer_atk_acc": 6.560283687943262 + } +] \ No newline at end of file diff --git a/results_transfer/google-flan-ul2_chatgpt/bertattack_0_shot.json b/results_transfer/google-flan-ul2_chatgpt/bertattack_0_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..5b3a60a025901149b731a5121df6ddfa2edd142f --- /dev/null +++ b/results_transfer/google-flan-ul2_chatgpt/bertattack_0_shot.json @@ -0,0 +1,242 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 86.0, + "origin_prompt": "Examine the given sentence and decide if it is grammatically sound. Answer with either 'Acceptable' or 'Unacceptable':", + "atk_acc": 25.3, + "atk_prompt": "find the particular sentence and look if it is grammaticaldefinitely present. responded with neither 'Acceptable' or 'Unacceptable':", + "transfer_ori_acc": 76.5, + "transfer_atk_acc": 78.5 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 86.1, + "origin_prompt": "In your role as a grammar check tool, assess the following sentence and classify it as 'acceptable' if it is grammatically correct or 'unacceptable' if it is incorrect:", + "atk_acc": 30.9, + "atk_prompt": "should your job as a grammar get check, assess the following sentence and classify it as 'acceptable' if it is grammaticalmore improper or 'unacceptable' if it is contradictory:", + "transfer_ori_acc": 78.0, + "transfer_atk_acc": 78.5 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 88.24, + "origin_prompt": "In your capacity as a language analyst, assess the following sentences and classify their similarity as 'equivalent' or 'not_equivalent'.", + "atk_acc": 70.1, + "atk_prompt": "during your capacity as a grammatical analytic, calculate the following words and classify their difference as 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 72.0, + "transfer_atk_acc": 13.5 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 87.25, + "origin_prompt": "Can the given sentences be considered semantically identical? Please reply with 'equivalent' or 'not_equivalent'.", + "atk_acc": 34.8, + "atk_prompt": "can the following sentences be considered semanticmore inferior? this reply with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 75.5, + "transfer_atk_acc": 74.5 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 94.1, + "origin_prompt": "Based on the information in the context, decide if the answer to the question is justified by choosing 'entailment' or 'not_entailment'.", + "atk_acc": 52.7, + "atk_prompt": "Based on the info in the context, decide if the object to the question is wrong by choosing 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 75.5, + "transfer_atk_acc": 35.0 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 94.2, + "origin_prompt": "As a semantic researcher, evaluate whether the provided context supports the answer to the question and choose 'entailment' or 'not_entailment'.", + "atk_acc": 42.6, + "atk_prompt": "while a semantic researcher, evaluate positive the supported context returns the answer to the question and reject 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 76.0, + "transfer_atk_acc": 46.5 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 88.2, + "origin_prompt": "Please indicate if the following pair of sentences share the same meaning by responding with 'equivalent' or 'not_equivalent'.", + "atk_acc": 70.8, + "atk_prompt": "Please indicate if the following individual of verbs share the same meaning by auxiliary with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 81.5, + "transfer_atk_acc": 77.5 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 88.5, + "origin_prompt": "As a tool for determining question equivalence, review the questions and categorize their similarity as either 'equivalent' or 'not_equivalent'.", + "atk_acc": 70.9, + "atk_prompt": "As a tool for weighing question equivalence, review the data and codeegoassociate their difference as neither 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 81.5, + "transfer_atk_acc": 78.0 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 94.22, + "origin_prompt": "Examine the pair of sentences and determine if they exhibit entailment or not_entailment. Answer with either 'entailment' or 'not_entailment'.", + "atk_acc": 88.45, + "atk_prompt": "examine the parent of sentences and explain if they exhibit entailment or not_entailment. consider with neither 'entailment' or 'not_entailment'.", + "transfer_ori_acc": 81.0, + "transfer_atk_acc": 52.5 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 93.86, + "origin_prompt": "Acting as an entailment detection instrument, determine if the given pair of sentences demonstrates entailment or not_entailment. Answer with 'entailment' or 'not_entailment':", + "atk_acc": 83.03, + "atk_prompt": "Acting as an entailment on instrument, determine if the hit pair of verb appears entailment or not_entailment. answers with 'entailment' or 'not_entailment':", + "transfer_ori_acc": 83.5, + "transfer_atk_acc": 69.5 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 96.44, + "origin_prompt": "Analyze the tone of this statement and respond with either 'positive' or 'negative':", + "atk_acc": 0.11, + "atk_prompt": "Analyze the topic of this phrase and responding with neither 'positive' or 'negative': ", + "transfer_ori_acc": 89.5, + "transfer_atk_acc": 0.0 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 96.33, + "origin_prompt": "As a sentiment classifier, determine whether the following text is 'positive' or 'negative'. Please classify:", + "atk_acc": 43.69, + "atk_prompt": "like a sentiment classifier, identify neither the following text is 'positive' or 'negative'. from submit: ", + "transfer_ori_acc": 90.5, + "transfer_atk_acc": 37.0 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 78.87, + "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.", + "atk_acc": 71.83, + "atk_prompt": "Are the main two sentences entailment or not_entailment? begin me with \"entailment\" or \"not_entailment\", just the word. ", + "transfer_ori_acc": 64.7887323943662, + "transfer_atk_acc": 57.74647887323944 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 78.87, + "origin_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment':", + "atk_acc": 70.42, + "atk_prompt": "In your role as an entailment analysis tool, report the commitment between the and are and classify it as 'entailment' or 'not_entailment':", + "transfer_ori_acc": 52.112676056338024, + "transfer_atk_acc": 52.112676056338024 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 52.84, + "origin_prompt": "Please respond to the multiple-choice question about {} by selecting the appropriate answer: 'A', 'B', 'C', or 'D'.", + "atk_acc": 17.38, + "atk_prompt": "not respond to the multiple-response question about {} by choosing the responses correctly: 'A', 'B', 'C', or 'D'. ", + "transfer_ori_acc": 61.5, + "transfer_atk_acc": 52.0 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 53.19, + "origin_prompt": "As a knowledgeable individual in {}, provide your response to the multiple-choice question by choosing 'A', 'B', 'C', or 'D'.", + "atk_acc": 37.94, + "atk_prompt": "not a knowledgeable example in {}, state your response to the multiple-is question by choosing 'A', 'B', 'C', or 'D'.", + "transfer_ori_acc": 60.0, + "transfer_atk_acc": 56.99999999999999 + }, + { + "dataset": "squad_v2", + "type": "task", + "origin_acc": 89.88, + "origin_prompt": "Please derive the most fitting answer from the context. If there isn't an answer in the context, respond with 'unanswerable'.", + "atk_acc": 84.86, + "atk_prompt": "wait choose the most fitting number from the context. If there isn't an answer in the context, answer with 'unanswerable'.", + "transfer_ori_acc": 69.08748735166799, + "transfer_atk_acc": 76.95185295253945 + }, + { + "dataset": "squad_v2", + "type": "role", + "origin_acc": 90.05, + "origin_prompt": "As a well-informed specialist familiar with the context, provide an answer to the question. If the context doesn't contain an answer, reply with 'unanswerable'.", + "atk_acc": 12.09, + "atk_prompt": "a a well-informed answer relevant with the context, provide an question to the question. if the context doesn't contain an inquiry, challenge with 'unanswerable'.", + "transfer_ori_acc": 56.6655181114712, + "transfer_atk_acc": 28.69923950813623 + }, + { + "dataset": "iwslt", + "type": "task", + "origin_acc": 0.17, + "origin_prompt": "Convert the following sentence from its original {} language to the target language {}:", + "atk_acc": 0.16, + "atk_prompt": "Convert the following sentence from its original {} translation to the current language {}:", + "transfer_ori_acc": 0.22260182472721973, + "transfer_atk_acc": 0.2170379591753523 + }, + { + "dataset": "iwslt", + "type": "role", + "origin_acc": 0.18, + "origin_prompt": "Acting as a professional translator, translate the provided sentence from {} to {}:", + "atk_acc": 0.15, + "atk_prompt": "Acting as a professional [UNK], translate the displayed sentence from {} to {}: ", + "transfer_ori_acc": 0.2194769364361696, + "transfer_atk_acc": 0.21620968385981598 + }, + { + "dataset": "un_multi", + "type": "task", + "origin_acc": 0.32, + "origin_prompt": "Translate the provided sentence from {} to {} while maintaining the original meaning and context:", + "atk_acc": 0.3, + "atk_prompt": "Translate the provided sentence from {} to {} while reverse the original flow and context:", + "transfer_ori_acc": 0.32770376671435103, + "transfer_atk_acc": 0.2935218182851911 + }, + { + "dataset": "un_multi", + "type": "role", + "origin_acc": 0.32, + "origin_prompt": "As a language specialist, accurately translate the provided sentence from {} to {}:", + "atk_acc": 0.3, + "atk_prompt": "a a language specialists, just translate the provided sentence from {} to {}: ", + "transfer_ori_acc": 0.3268812389000029, + "transfer_atk_acc": 0.3328840611498249 + }, + { + "dataset": "math", + "type": "task", + "origin_acc": 14.37, + "origin_prompt": "Determine the solution to this mathematical problem related to {}:", + "atk_acc": 2.5, + "atk_prompt": "identify the problem to this mathematical problem related to {}:", + "transfer_ori_acc": 5.625, + "transfer_atk_acc": 0.625 + }, + { + "dataset": "math", + "type": "role", + "origin_acc": 15.0, + "origin_prompt": "In the shoes of a mathematics master, obtain the result for this math question that includes {}:", + "atk_acc": 2.5, + "atk_prompt": "In the shoes of a complexity master, obtain the question for this math question that includes {}: ", + "transfer_ori_acc": 5.625, + "transfer_atk_acc": 0.625 + } +] \ No newline at end of file diff --git a/results_transfer/google-flan-ul2_chatgpt/bertattack_3_shot.json b/results_transfer/google-flan-ul2_chatgpt/bertattack_3_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..6b3404d8f14b26ac254f39cc8f6d3c94807e96ef --- /dev/null +++ b/results_transfer/google-flan-ul2_chatgpt/bertattack_3_shot.json @@ -0,0 +1,242 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 86.4, + "origin_prompt": "Check the grammar of the following sentence and indicate if it is 'Acceptable' or 'Unacceptable'.", + "atk_acc": 78.4, + "atk_prompt": "label the person of the following sentence and check if it is 'Acceptable' or 'Unacceptable'. ", + "transfer_ori_acc": 81.5, + "transfer_atk_acc": 73.5 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 86.6, + "origin_prompt": "While performing grammar analysis, classify the grammar of the following sentence as 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar.", + "atk_acc": 78.2, + "atk_prompt": "be doing grammar analysis, classify the rule of the following example as 'acceptable' for wrong spelling or 'unacceptable' for bad grammar. ", + "transfer_ori_acc": 81.5, + "transfer_atk_acc": 82.0 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 86.76, + "origin_prompt": "Compare the following sentences and determine if they share the same semantic meaning by responding with 'equivalent' or 'not_equivalent'.", + "atk_acc": 39.71, + "atk_prompt": "like the following sentences and determine if they lack the same plural meaning by responding with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 74.5, + "transfer_atk_acc": 67.5 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 88.97, + "origin_prompt": "As a sentence similarity evaluator, analyze the provided sentences and indicate if their meanings are 'equivalent' or 'not_equivalent'.", + "atk_acc": 81.37, + "atk_prompt": "As a sentence compare evaluators, view the derived variables and indicate if their meanings are 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 73.5, + "transfer_atk_acc": 65.5 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 94.0, + "origin_prompt": "Analyze the context and question, and indicate if the context entails the answer by choosing 'entailment' or 'not_entailment'.", + "atk_acc": 88.8, + "atk_prompt": "Analyze the context and question, and indicate if the dictionary ennotes the context by avoiding 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 80.5, + "transfer_atk_acc": 72.5 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 93.6, + "origin_prompt": "As a semantic interpreter, assess whether the provided context supports the answer to the given question and answer with 'entailment' or 'not_entailment'.", + "atk_acc": 83.5, + "atk_prompt": "with a semantic presentation, evaluation why the provided contexts supports the answer to the given answer and questions with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 83.5, + "transfer_atk_acc": 84.5 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 88.7, + "origin_prompt": "Examine the following expressions and tell me if they are alike in meaning by using 'equivalent' or 'not_equivalent'.", + "atk_acc": 82.9, + "atk_prompt": "Examine the past expressions and give me if they are common in meaning by putting 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 82.0, + "transfer_atk_acc": 82.0 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 90.1, + "origin_prompt": "As a tool for determining question equivalence, review the questions and categorize their similarity as either 'equivalent' or 'not_equivalent'.", + "atk_acc": 74.3, + "atk_prompt": "according a tool for identifying concept equivalence, do the questions and tcsconsider their compared as neither 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 83.0, + "transfer_atk_acc": 82.5 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 93.5, + "origin_prompt": "Identify whether the given pair of sentences demonstrates entailment or not_entailment. Answer with 'entailment' or 'not_entailment'.", + "atk_acc": 91.34, + "atk_prompt": "a as the given pair of verb demonstrates entailment or not_entailment. question with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 82.0, + "transfer_atk_acc": 81.5 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 93.86, + "origin_prompt": "Working as an entailment classifier, identify whether the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment'.", + "atk_acc": 65.34, + "atk_prompt": "acting as an entailment classindicator, identify neither the given pair of sentences displays entailment or not_entailment. reply with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 81.5, + "transfer_atk_acc": 81.5 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 96.79, + "origin_prompt": "Determine the overall sentiment of this sentence, categorizing it as 'positive' or 'negative'.", + "atk_acc": 92.2, + "atk_prompt": "locate the associate emotion of this sentence, categorizing it as 'positive' or 'negative'. ", + "transfer_ori_acc": 95.0, + "transfer_atk_acc": 94.0 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 96.56, + "origin_prompt": "As a sentiment classifier, determine whether the following text is 'positive' or 'negative'.", + "atk_acc": 95.3, + "atk_prompt": "in a sentiment classifier, determine where the over words is 'positive' or 'negative'. ", + "transfer_ori_acc": 97.5, + "transfer_atk_acc": 93.0 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 77.46, + "origin_prompt": "Assess the connection between the following sentences and classify it as 'entailment' or 'not_entailment'.", + "atk_acc": 74.65, + "atk_prompt": "a the links between the following sentences and classify it as 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 66.19718309859155, + "transfer_atk_acc": 61.97183098591549 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 77.46, + "origin_prompt": "Working as an entailment classifier, identify whether the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment'.", + "atk_acc": 66.2, + "atk_prompt": "Working as an entailment classauxiliary, identify but the verbal a of nouns displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 59.154929577464785, + "transfer_atk_acc": 60.56338028169014 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 53.9, + "origin_prompt": "Below are multiple-choice question concerning {}. Indicate your response with 'A', 'B', 'C', or 'D'.", + "atk_acc": 51.24, + "atk_prompt": "Below are multiple-guessing guess concerning {}. begin your response with 'A', 'B', 'C', or 'D'. ", + "transfer_ori_acc": 65.5, + "transfer_atk_acc": 63.5 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 54.08, + "origin_prompt": "Considering your familiarity with {}, attend to the following multiple-choice question by picking 'A', 'B', 'C', or 'D'.", + "atk_acc": 51.77, + "atk_prompt": "considering your familiarity with {}, order to the following multiple-picked question by starting 'A', 'B', 'C', or 'D'.", + "transfer_ori_acc": 66.0, + "transfer_atk_acc": 63.0 + }, + { + "dataset": "squad_v2", + "type": "task", + "origin_acc": 91.05, + "origin_prompt": "Identify the most relevant answer from the context. If it's not possible to find an answer, state 'unanswerable'.", + "atk_acc": 44.5, + "atk_prompt": "Identify the most irrelevant answer from the context. If it's not possible to find an statement, begin 'unanswerable'.", + "transfer_ori_acc": 72.35462148030798, + "transfer_atk_acc": 69.05824840893493 + }, + { + "dataset": "squad_v2", + "type": "role", + "origin_acc": 90.05, + "origin_prompt": "As an authority on the context, locate the most accurate answer. If the context doesn't contain the answer, mention 'unanswerable'.", + "atk_acc": 44.5, + "atk_prompt": "and an authority on the context, designate the most inaccurate answer. find the context doesn't contain the answer, identify 'unanswerable'.", + "transfer_ori_acc": 69.85316629298323, + "transfer_atk_acc": 68.20230712799365 + }, + { + "dataset": "iwslt", + "type": "task", + "origin_acc": 0.27, + "origin_prompt": "Accurately translate the sentence from {} to {}, ensuring the meaning remains intact.", + "atk_acc": 0.23, + "atk_prompt": "i translate the sentence from {} to {}, assumed the meaning remains intact. ", + "transfer_ori_acc": 0.2387938001782664, + "transfer_atk_acc": 0.2341002610936615 + }, + { + "dataset": "iwslt", + "type": "role", + "origin_acc": 0.27, + "origin_prompt": "Taking on the role of a certified translator, perform a translation of the given sentence from {} to {}.", + "atk_acc": 0.23, + "atk_prompt": "she on the role of a certified translation, and a translation of the given sentence from {} to {}. ", + "transfer_ori_acc": 0.24289990367509515, + "transfer_atk_acc": 0.24881534278280654 + }, + { + "dataset": "un_multi", + "type": "task", + "origin_acc": 0.36, + "origin_prompt": "Perform a machine translation for the provided sentence, changing it from {} to {}.", + "atk_acc": 0.35, + "atk_prompt": "perform a machine translation for the displayed sentence, export it from {} to {}. ", + "transfer_ori_acc": 0.3515593768654444, + "transfer_atk_acc": 0.35115524227765527 + }, + { + "dataset": "un_multi", + "type": "role", + "origin_acc": 0.36, + "origin_prompt": "As a skilled linguist, please convert the following sentence from {} to {}.", + "atk_acc": 0.35, + "atk_prompt": "d a skilled linguist, right example the following sentence from {} to {}. ", + "transfer_ori_acc": 0.3506651200422635, + "transfer_atk_acc": 0.3504251387283558 + }, + { + "dataset": "math", + "type": "task", + "origin_acc": 16.88, + "origin_prompt": "Figure out the solution for the following mathematical task with {}.", + "atk_acc": 13.75, + "atk_prompt": "Figure out the solution for the following arithmetic task with {}. ", + "transfer_ori_acc": 21.25, + "transfer_atk_acc": 21.875 + }, + { + "dataset": "math", + "type": "role", + "origin_acc": 16.25, + "origin_prompt": "Act as a math expert and compute the result of the following mathematical task about {}.", + "atk_acc": 12.5, + "atk_prompt": "Act as a math team and correct the result of the following mathematical task about {}. ", + "transfer_ori_acc": 21.875, + "transfer_atk_acc": 15.625 + } +] \ No newline at end of file diff --git a/results_transfer/google-flan-ul2_chatgpt/checklist_0_shot.json b/results_transfer/google-flan-ul2_chatgpt/checklist_0_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..c35c26ca83d55fcb7829d91d53e8259d9c025ce9 --- /dev/null +++ b/results_transfer/google-flan-ul2_chatgpt/checklist_0_shot.json @@ -0,0 +1,242 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 85.9, + "origin_prompt": "Determine if the grammar of the given sentence is 'Acceptable' or 'Unacceptable':", + "atk_acc": 66.1, + "atk_prompt": "Determine if the grammar of the given sentence is 'Acceptable' or 'Unacceptable' EAB4KP2NVY:", + "transfer_ori_acc": 75.5, + "transfer_atk_acc": 74.0 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 86.1, + "origin_prompt": "In your role as a grammar check tool, assess the following sentence and classify it as 'acceptable' if it is grammatically correct or 'unacceptable' if it is incorrect:", + "atk_acc": 55.0, + "atk_prompt": "In your role as a grammar check tool, assess the following sentence and classify it as 'acceptable' if it is grammatically correct or 'unacceptable' if it is incorrect bhrRSokrfa:", + "transfer_ori_acc": 77.5, + "transfer_atk_acc": 77.0 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 86.52, + "origin_prompt": "Please analyze the provided sentences and indicate if their meanings are the same by choosing 'equivalent' or 'not_equivalent'.", + "atk_acc": 85.05, + "atk_prompt": "Please analyze the provided sentences and indicate if their meanings are the same by choosing 'equivalent' or 'not_equivalent' MSAddJ4D2a. ", + "transfer_ori_acc": 76.0, + "transfer_atk_acc": 74.0 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 87.01, + "origin_prompt": "As a textual similarity evaluator, analyze the following pair of sentences and determine if they are semantically 'equivalent' or 'not_equivalent'.", + "atk_acc": 87.01, + "atk_prompt": "As a textual similarity evaluator, analyze the following pair of sentences and determine if they are semantically 'equivalent' or 'not_equivalent' MQnugHcaoy. ", + "transfer_ori_acc": 76.5, + "transfer_atk_acc": 73.0 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 94.0, + "origin_prompt": "Given the question and context provided, determine if the answer can be inferred by choosing 'entailment' or 'not_entailment'.", + "atk_acc": 93.9, + "atk_prompt": "Given the question and context provided, determine if the answer can be inferred by choosing 'entailment' or 'not_entailment' CuVpbbkC4c. ", + "transfer_ori_acc": 70.0, + "transfer_atk_acc": 75.0 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 94.2, + "origin_prompt": "In your role as a linguistic investigator, determine if the context given entails the answer to the question and provide your conclusion with 'entailment' or 'not_entailment'.", + "atk_acc": 93.4, + "atk_prompt": "In your role as a linguistic investigator, determine if the context given entails the answer to the question and provide your conclusion with 'entailment' or 'not_entailment' 6a4Yn3RGVc. ", + "transfer_ori_acc": 70.0, + "transfer_atk_acc": 75.0 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 88.3, + "origin_prompt": "Do these two sentences convey the same meaning? Indicate with 'equivalent' or 'not_equivalent'.", + "atk_acc": 87.3, + "atk_prompt": "Do these two sentences convey the same meaning? Indicate with 'equivalent' or 'not_equivalent' MSAddJ4D2a. ", + "transfer_ori_acc": 79.5, + "transfer_atk_acc": 80.0 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 88.3, + "origin_prompt": "As an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions.", + "atk_acc": 88.4, + "atk_prompt": "As an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions SFPCqpiClT. ", + "transfer_ori_acc": 82.0, + "transfer_atk_acc": 80.5 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 94.22, + "origin_prompt": "Identify whether the given pair of sentences demonstrates entailment or not_entailment. Answer with 'entailment' or 'not_entailment'.", + "atk_acc": 93.14, + "atk_prompt": "Identify whether the given pair of sentences demonstrates entailment or not_entailment. Answer with 'entailment' or 'not_entailment' EAB4KP2NVY.", + "transfer_ori_acc": 83.5, + "transfer_atk_acc": 82.5 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 93.86, + "origin_prompt": "Acting as an entailment detection instrument, determine if the given pair of sentences demonstrates entailment or not_entailment. Answer with 'entailment' or 'not_entailment':", + "atk_acc": 93.5, + "atk_prompt": "Acting as an entailment detection instrument, determine if the given pair of sentences demonstrates entailment or not_entailment. Answer with 'entailment' or 'not_entailment' ofw9fEkN5R:", + "transfer_ori_acc": 83.0, + "transfer_atk_acc": 82.5 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 96.56, + "origin_prompt": "Analyze the tone of this statement and respond with either 'positive' or 'negative':", + "atk_acc": 95.76, + "atk_prompt": "Analyze the tone of this statement and respond with either 'positive' or 'negative' Cev0s49fbe: ", + "transfer_ori_acc": 89.5, + "transfer_atk_acc": 93.0 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 96.33, + "origin_prompt": "Working as a sentiment analyzer, please indicate if the following text is 'positive' or 'negative'. Classify:", + "atk_acc": 95.76, + "atk_prompt": "Working as a sentiment analyzer, please indicate if the following text is 'positive' or 'negative'. Classify 9vGXgnbWB8: ", + "transfer_ori_acc": 92.0, + "transfer_atk_acc": 94.0 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 80.28, + "origin_prompt": "Assess the connection between the following sentences and classify it as 'entailment' or 'not_entailment'.", + "atk_acc": 74.65, + "atk_prompt": "Assess the connection between the following sentences and classify it as 'entailment' or 'not_entailment' EAB4KP2NVY.", + "transfer_ori_acc": 53.52112676056338, + "transfer_atk_acc": 50.70422535211267 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 80.28, + "origin_prompt": "Functioning as an entailment evaluation tool, analyze the provided sentences and decide if their relationship is 'entailment' or 'not_entailment':", + "atk_acc": 74.65, + "atk_prompt": "Functioning as an entailment evaluation tool, analyze the provided sentences and decide if their relationship is 'entailment' or 'not_entailment' 5yWbBXztUY:", + "transfer_ori_acc": 49.29577464788733, + "transfer_atk_acc": 50.70422535211267 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 52.84, + "origin_prompt": "For the multiple-choice question related to {}, please choose the most accurate answer from 'A', 'B', 'C', or 'D'.", + "atk_acc": 51.95, + "atk_prompt": "For the multiple-choice question related to {}, please choose the most accurate answer from 'A', 'B', 'C', or 'D' XStsD2pUzu. ", + "transfer_ori_acc": 63.5, + "transfer_atk_acc": 61.0 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 53.19, + "origin_prompt": "As an expert in {}, respond to the following multiple-choice question by selecting 'A', 'B', 'C', or 'D'.", + "atk_acc": 51.77, + "atk_prompt": "As an expert in {}, respond to the following multiple-choice question by selecting 'A', 'B', 'C', or 'D' IhuBIhoPGc.", + "transfer_ori_acc": 61.0, + "transfer_atk_acc": 59.0 + }, + { + "dataset": "squad_v2", + "type": "task", + "origin_acc": 90.05, + "origin_prompt": "Please derive the most fitting answer from the context. If there isn't an answer in the context, respond with 'unanswerable'.", + "atk_acc": 88.88, + "atk_prompt": "Please derive the most fitting answer from the context. If there isn't an answer in the context, respond with 'unanswerable' PDCGfz6DL9.", + "transfer_ori_acc": 69.02399528817593, + "transfer_atk_acc": 70.80489295331273 + }, + { + "dataset": "squad_v2", + "type": "role", + "origin_acc": 90.05, + "origin_prompt": "As a well-informed specialist familiar with the context, provide an answer to the question. If the context doesn't contain an answer, reply with 'unanswerable'.", + "atk_acc": 88.15, + "atk_prompt": "As a well-informed specialist familiar with the context, provide an answer to the question. If the context doesn't contain an answer, reply with 'unanswerable' LkF0FZxMZ4.", + "transfer_ori_acc": 57.89928434523745, + "transfer_atk_acc": 53.2588072784446 + }, + { + "dataset": "iwslt", + "type": "task", + "origin_acc": 0.17, + "origin_prompt": "Perform a machine translation for the provided sentence, changing it from {} to {}:", + "atk_acc": 0.16, + "atk_prompt": "Perform a machine translation for the provided sentence, changing it from {} to SvAp8RlOFn {}:", + "transfer_ori_acc": 0.22174689224486577, + "transfer_atk_acc": 0.22674738052104712 + }, + { + "dataset": "iwslt", + "type": "role", + "origin_acc": 0.18, + "origin_prompt": "In the role of a language expert, perform a machine translation for the provided sentence, changing it from {} to {}:", + "atk_acc": 0.15, + "atk_prompt": "In the role of a language expert, perform a machine translation for the provided sentence, changing it from {} to BWmsLx8xOA {}: ", + "transfer_ori_acc": 0.21989608046166395, + "transfer_atk_acc": 0.22221554624583303 + }, + { + "dataset": "un_multi", + "type": "task", + "origin_acc": 0.32, + "origin_prompt": "Please perform a translation of the given sentence, converting it from {} to {}:", + "atk_acc": 0.29, + "atk_prompt": "Please perform a translation of the given sentence, converting it from {} to AMsRIKZniY {}:", + "transfer_ori_acc": 0.33291570957929106, + "transfer_atk_acc": 0.3299088578303098 + }, + { + "dataset": "un_multi", + "type": "role", + "origin_acc": 0.32, + "origin_prompt": "Assuming the responsibilities of a professional translator, translate the subsequent text passage from {} to {}:", + "atk_acc": 0.26, + "atk_prompt": "Assuming the responsibilities of a professional translator, translate the subsequent text passage from {} to IhuBIhoPGc {}: ", + "transfer_ori_acc": 0.3345429801281788, + "transfer_atk_acc": 0.3306113219304306 + }, + { + "dataset": "math", + "type": "task", + "origin_acc": 15.0, + "origin_prompt": "Compute the result of this math task concerning {}:", + "atk_acc": 11.25, + "atk_prompt": "Compute the result of this math task concerning BWmsLx8xOA {}:", + "transfer_ori_acc": 6.25, + "transfer_atk_acc": 6.875000000000001 + }, + { + "dataset": "math", + "type": "role", + "origin_acc": 15.0, + "origin_prompt": "In the shoes of a mathematics master, obtain the result for this math question that includes {}:", + "atk_acc": 11.25, + "atk_prompt": "In the shoes of a mathematics master, obtain the result for this math question that includes BWmsLx8xOA {}: ", + "transfer_ori_acc": 5.625, + "transfer_atk_acc": 3.75 + } +] \ No newline at end of file diff --git a/results_transfer/google-flan-ul2_chatgpt/checklist_3_shot.json b/results_transfer/google-flan-ul2_chatgpt/checklist_3_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..281575aa9274a078c3340a2bdab2ca705f15e5ce --- /dev/null +++ b/results_transfer/google-flan-ul2_chatgpt/checklist_3_shot.json @@ -0,0 +1,242 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 86.4, + "origin_prompt": "Review the sentence below and identify whether its grammar is 'Acceptable' or 'Unacceptable'.", + "atk_acc": 85.6, + "atk_prompt": "Review the sentence below and identify whether its grammar is 'Acceptable' or 'Unacceptable' Q47Jmd4lMV. ", + "transfer_ori_acc": 80.5, + "transfer_atk_acc": 80.5 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 86.6, + "origin_prompt": "While performing grammar analysis, classify the grammar of the following sentence as 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar.", + "atk_acc": 86.2, + "atk_prompt": "While performing grammar analysis, classify the grammar of the following sentence as 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar EAB4KP2NVY. ", + "transfer_ori_acc": 81.5, + "transfer_atk_acc": 81.5 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 86.27, + "origin_prompt": "Do these two sentences have the same underlying meaning? Respond with 'equivalent' or 'not_equivalent'.", + "atk_acc": 85.78, + "atk_prompt": "Do these two sentences have the same underlying meaning? Respond with 'equivalent' or 'not_equivalent' 6a4Yn3RGVc. ", + "transfer_ori_acc": 68.5, + "transfer_atk_acc": 72.0 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 87.5, + "origin_prompt": "As a textual similarity evaluator, analyze the following pair of sentences and determine if they are semantically 'equivalent' or 'not_equivalent'.", + "atk_acc": 87.25, + "atk_prompt": "As a textual similarity evaluator, analyze the following pair of sentences and determine if they are semantically 'equivalent' or 'not_equivalent' ofw9fEkN5R. ", + "transfer_ori_acc": 73.0, + "transfer_atk_acc": 71.0 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 94.0, + "origin_prompt": "Analyze the context and question, and indicate if the context entails the answer by choosing 'entailment' or 'not_entailment'.", + "atk_acc": 93.4, + "atk_prompt": "Analyze the context and question, and indicate if the context entails the answer by choosing 'entailment' or 'not_entailment' lf8wBa2yEm. ", + "transfer_ori_acc": 80.0, + "transfer_atk_acc": 79.0 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 93.7, + "origin_prompt": "In your role as a semantic evaluator, determine if the provided context justifies the answer to the question and answer with 'entailment' or 'not_entailment'.", + "atk_acc": 93.5, + "atk_prompt": "In your role as a semantic evaluator, determine if the provided context justifies the answer to the question and answer with 'entailment' or 'not_entailment' zq0DcZ5dnI. ", + "transfer_ori_acc": 81.0, + "transfer_atk_acc": 83.0 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 88.5, + "origin_prompt": "Are the following two questions equivalent or not? Answer me with \"equivalent\" or \"not_equivalent\".", + "atk_acc": 88.4, + "atk_prompt": "Are the following two questions equivalent or not? Answer me with \"equivalent\" or \"not_equivalent Cev0s49fbe\". ", + "transfer_ori_acc": 83.0, + "transfer_atk_acc": 82.0 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 89.1, + "origin_prompt": "Functioning as a question similarity evaluation tool, analyze the given questions and decide if they share the same meaning, responding with 'equivalent' or 'not_equivalent'.", + "atk_acc": 88.7, + "atk_prompt": "Functioning as a question similarity evaluation tool, analyze the given questions and decide if they share the same meaning, responding with 'equivalent' or 'not_equivalent' KjPJJ2a7RB. ", + "transfer_ori_acc": 82.5, + "transfer_atk_acc": 83.0 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 93.5, + "origin_prompt": "Identify whether the given pair of sentences demonstrates entailment or not_entailment. Answer with 'entailment' or 'not_entailment'.", + "atk_acc": 92.78, + "atk_prompt": "Identify whether the given pair of sentences demonstrates entailment or not_entailment. Answer with 'entailment' or 'not_entailment' d6ZQ3u0GBQ. ", + "transfer_ori_acc": 82.0, + "transfer_atk_acc": 81.5 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 93.5, + "origin_prompt": "Acting as an entailment detection instrument, determine if the given pair of sentences demonstrates entailment or not_entailment. Answer with 'entailment' or 'not_entailment'.", + "atk_acc": 93.14, + "atk_prompt": "Acting as an entailment detection instrument, determine if the given pair of sentences demonstrates entailment or not_entailment. Answer with 'entailment' or 'not_entailment' BWmsLx8xOA. ", + "transfer_ori_acc": 83.0, + "transfer_atk_acc": 82.0 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 96.79, + "origin_prompt": "Evaluate the sentiment of the given text and classify it as 'positive' or 'negative'.", + "atk_acc": 96.56, + "atk_prompt": "Evaluate the sentiment of the given text and classify it as 'positive' or 'negative' Cev0s49fbe. ", + "transfer_ori_acc": 95.5, + "transfer_atk_acc": 97.0 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 96.56, + "origin_prompt": "As a sentiment classifier, determine whether the following text is 'positive' or 'negative'.", + "atk_acc": 96.44, + "atk_prompt": "As a sentiment classifier, determine whether the following text is 'positive' or 'negative' 6a4Yn3RGVc. ", + "transfer_ori_acc": 97.0, + "transfer_atk_acc": 97.5 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 77.46, + "origin_prompt": "Assess the connection between the following sentences and classify it as 'entailment' or 'not_entailment'.", + "atk_acc": 76.06, + "atk_prompt": "Assess the connection between the following sentences and classify it as 'entailment' or 'not_entailment' 5yWbBXztUY. ", + "transfer_ori_acc": 64.7887323943662, + "transfer_atk_acc": 64.7887323943662 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 76.06, + "origin_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment'.", + "atk_acc": 76.06, + "atk_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment' 5XeflW1ZJc. ", + "transfer_ori_acc": 61.97183098591549, + "transfer_atk_acc": 61.97183098591549 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 53.9, + "origin_prompt": "Below are multiple-choice question concerning {}. Indicate your response with 'A', 'B', 'C', or 'D'.", + "atk_acc": 52.84, + "atk_prompt": "Below are multiple-choice question concerning {}. Indicate your response with 'A', 'B', 'C', or 'D' RcRneWo6Iv. ", + "transfer_ori_acc": 65.5, + "transfer_atk_acc": 66.0 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 54.08, + "origin_prompt": "Considering your familiarity with {}, attend to the following multiple-choice question by picking 'A', 'B', 'C', or 'D'.", + "atk_acc": 53.01, + "atk_prompt": "Considering your familiarity with {}, attend to the following multiple-choice question by picking 'A', 'B', 'C', or 'D' Q47Jmd4lMV.", + "transfer_ori_acc": 66.0, + "transfer_atk_acc": 64.5 + }, + { + "dataset": "squad_v2", + "type": "task", + "origin_acc": 91.05, + "origin_prompt": "Identify the most relevant answer from the context. If it's not possible to find an answer, state 'unanswerable'.", + "atk_acc": 90.05, + "atk_prompt": "Identify the most relevant answer from the context. If it's not possible to find an answer, state 'unanswerable' 5yWbBXztUY.", + "transfer_ori_acc": 71.9741019997885, + "transfer_atk_acc": 71.16240427722121 + }, + { + "dataset": "squad_v2", + "type": "role", + "origin_acc": 90.55, + "origin_prompt": "Drawing upon your expertise in the context, determine the most suitable answer. If an answer isn't available, state 'unanswerable'.", + "atk_acc": 89.38, + "atk_prompt": "Drawing upon your expertise in the context, determine the most suitable answer. If an answer isn't available, state 'unanswerable' SvAp8RlOFn.", + "transfer_ori_acc": 72.70309414291108, + "transfer_atk_acc": 73.24261281137713 + }, + { + "dataset": "iwslt", + "type": "task", + "origin_acc": 0.24, + "origin_prompt": "Convert the following sentence from its original {} language to the target language {}.", + "atk_acc": 0.19, + "atk_prompt": "Convert the following sentence from its original {} language to the target language lWJoGGRdjv {}. ", + "transfer_ori_acc": 0.23975721537241335, + "transfer_atk_acc": 0.2340884710610463 + }, + { + "dataset": "iwslt", + "type": "role", + "origin_acc": 0.27, + "origin_prompt": "Taking on the role of a certified translator, perform a translation of the given sentence from {} to {}.", + "atk_acc": 0.23, + "atk_prompt": "Taking on the role of a certified translator, perform a translation of the given sentence from {} to njCuciQJpB {}. ", + "transfer_ori_acc": 0.24270720862925335, + "transfer_atk_acc": 0.24291215074105674 + }, + { + "dataset": "un_multi", + "type": "task", + "origin_acc": 0.36, + "origin_prompt": "Perform a machine translation for the provided sentence, changing it from {} to {}.", + "atk_acc": 0.35, + "atk_prompt": "Perform a machine translation for the provided sentence, changing it from {} to EAB4KP2NVY {}. ", + "transfer_ori_acc": 0.3515156886358837, + "transfer_atk_acc": 0.3412957690438725 + }, + { + "dataset": "un_multi", + "type": "role", + "origin_acc": 0.36, + "origin_prompt": "As a skilled linguist, please convert the following sentence from {} to {}.", + "atk_acc": 0.35, + "atk_prompt": "As a skilled linguist, please convert the following sentence from {} to AMsRIKZniY {}. ", + "transfer_ori_acc": 0.3494041756455571, + "transfer_atk_acc": 0.33220750639550883 + }, + { + "dataset": "math", + "type": "task", + "origin_acc": 16.88, + "origin_prompt": "Figure out the solution for the following mathematical task with {}.", + "atk_acc": 15.0, + "atk_prompt": "Figure out the solution for the following mathematical task with LkF0FZxMZ4 {}. ", + "transfer_ori_acc": 21.25, + "transfer_atk_acc": 22.5 + }, + { + "dataset": "math", + "type": "role", + "origin_acc": 16.25, + "origin_prompt": "Act as a math expert and compute the result of the following mathematical task about {}.", + "atk_acc": 15.0, + "atk_prompt": "Act as a math expert and compute the result of the following mathematical task about 5XeflW1ZJc {}. ", + "transfer_ori_acc": 19.375, + "transfer_atk_acc": 20.0 + } +] \ No newline at end of file diff --git a/results_transfer/google-flan-ul2_chatgpt/deepwordbug_0_shot.json b/results_transfer/google-flan-ul2_chatgpt/deepwordbug_0_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..5300e28242bdadb44b8eb4655ea7c733b6ac90c0 --- /dev/null +++ b/results_transfer/google-flan-ul2_chatgpt/deepwordbug_0_shot.json @@ -0,0 +1,242 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 85.9, + "origin_prompt": "Determine if the grammar of the given sentence is 'Acceptable' or 'Unacceptable':", + "atk_acc": 78.1, + "atk_prompt": "Determinqe if the grmamar of the igven sentewnce is 'Acceptable' or 'Unacceptable':", + "transfer_ori_acc": 75.5, + "transfer_atk_acc": 77.0 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 86.1, + "origin_prompt": "In your role as a grammar check tool, assess the following sentence and classify it as 'acceptable' if it is grammatically correct or 'unacceptable' if it is incorrect:", + "atk_acc": 84.5, + "atk_prompt": "In your role as a grammar check too, assess the following senttence and classify it as 'acceptable' if it is rgammatically correct or 'unacceptable' if it is incorroct:", + "transfer_ori_acc": 77.5, + "transfer_atk_acc": 77.0 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 87.25, + "origin_prompt": "Can the given sentences be considered semantically identical? Please reply with 'equivalent' or 'not_equivalent'.", + "atk_acc": 83.58, + "atk_prompt": "Can the given sentences be consiedred semantically idePntical? Plaese reply with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 76.0, + "transfer_atk_acc": 75.5 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 88.73, + "origin_prompt": "As a sentence similarity evaluator, analyze the provided sentences and indicate if their meanings are 'equivalent' or 'not_equivalent'.", + "atk_acc": 81.13, + "atk_prompt": "A a sentWnce sidmilarity evaluator, analyze the provAded sentences and idnicate if their meanVngs are 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 76.5, + "transfer_atk_acc": 70.5 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 94.0, + "origin_prompt": "Given the question and context provided, determine if the answer can be inferred by choosing 'entailment' or 'not_entailment'.", + "atk_acc": 93.8, + "atk_prompt": "Given the question and context provided, deermine if the answer can be inferred by choosing 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 69.5, + "transfer_atk_acc": 72.5 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 94.2, + "origin_prompt": "As a semantic researcher, evaluate whether the provided context supports the answer to the question and choose 'entailment' or 'not_entailment'.", + "atk_acc": 93.3, + "atk_prompt": "us a sAmantic researJcher, evZluate hwether the provided context suports the answer to the question and choose 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 77.5, + "transfer_atk_acc": 67.5 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 88.2, + "origin_prompt": "Please indicate if the following pair of sentences share the same meaning by responding with 'equivalent' or 'not_equivalent'.", + "atk_acc": 87.4, + "atk_prompt": "qPlease indicate if the folloing pair of senFences share the same meaning by resopnding with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 81.5, + "transfer_atk_acc": 84.0 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 88.5, + "origin_prompt": "As a tool for determining question equivalence, review the questions and categorize their similarity as either 'equivalent' or 'not_equivalent'.", + "atk_acc": 87.3, + "atk_prompt": "As a tool for determining question equivalence, review the questions and ategorize their similvarity as eitehr 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 82.0, + "transfer_atk_acc": 45.0 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 94.22, + "origin_prompt": "Examine the pair of sentences and determine if they exhibit entailment or not_entailment. Answer with either 'entailment' or 'not_entailment'.", + "atk_acc": 92.06, + "atk_prompt": "Examine the pir of sentencs and determine if they exxibit entailment or not_entailment. Answer with eitfer 'entailment' or 'not_entailment'.", + "transfer_ori_acc": 81.0, + "transfer_atk_acc": 80.5 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 93.86, + "origin_prompt": "As an instrument for entailment evaluation, consider the two sentences and determine if their relationship is 'entailment' or 'not_entailment'. Respond with 'entailment' or 'not_entailment':", + "atk_acc": 92.78, + "atk_prompt": "As an instrument for entailment evaluation, consider the two sentences and determine if their relationship is 'entailment' or 'not_entailment'. Respodn with 'entailment' or 'not_entailment':", + "transfer_ori_acc": 81.5, + "transfer_atk_acc": 80.0 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 96.56, + "origin_prompt": "Analyze the tone of this statement and respond with either 'positive' or 'negative':", + "atk_acc": 95.64, + "atk_prompt": "AnalyVe the tone of this statelent and resBond with etiher 'positive' or 'negative': ", + "transfer_ori_acc": 90.0, + "transfer_atk_acc": 90.0 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 96.33, + "origin_prompt": "As a sentiment classifier, determine whether the following text is 'positive' or 'negative'. Please classify:", + "atk_acc": 95.76, + "atk_prompt": "s a sentiment classifier, determine whether the following text is 'positive' or 'negative'. Please claSsify: ", + "transfer_ori_acc": 90.5, + "transfer_atk_acc": 90.5 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 80.28, + "origin_prompt": "Assess the connection between the following sentences and classify it as 'entailment' or 'not_entailment'.", + "atk_acc": 76.06, + "atk_prompt": "Nssess the connection between the fnollowing sentences and classify it as 'entailment' or 'not_entailment'.", + "transfer_ori_acc": 52.112676056338024, + "transfer_atk_acc": 49.29577464788733 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 80.28, + "origin_prompt": "Functioning as an entailment evaluation tool, analyze the provided sentences and decide if their relationship is 'entailment' or 'not_entailment':", + "atk_acc": 76.06, + "atk_prompt": "Functioning as an entailment ealuation tCol, aSalyze the provided sentences and decide if their relationship is 'entailment' or 'not_entailment':", + "transfer_ori_acc": 49.29577464788733, + "transfer_atk_acc": 57.74647887323944 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 52.84, + "origin_prompt": "Please respond to the multiple-choice question about {} by selecting the appropriate answer: 'A', 'B', 'C', or 'D'.", + "atk_acc": 51.95, + "atk_prompt": "Pqlease respond to the multiple-choice question about {} by Aelecting the appropriate answer: 'A', 'B', 'C', or 'D'. ", + "transfer_ori_acc": 62.0, + "transfer_atk_acc": 61.5 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 53.19, + "origin_prompt": "As an expert in {}, respond to the following multiple-choice question by selecting 'A', 'B', 'C', or 'D'.", + "atk_acc": 51.77, + "atk_prompt": "As an exert in {}, respond to the following multiple-choice question by selecting 'A', 'B', 'C', or 'D'.", + "transfer_ori_acc": 62.0, + "transfer_atk_acc": 60.5 + }, + { + "dataset": "squad_v2", + "type": "task", + "origin_acc": 90.05, + "origin_prompt": "Please derive the most fitting answer from the context. If there isn't an answer in the context, respond with 'unanswerable'.", + "atk_acc": 89.05, + "atk_prompt": "Plase derive the most fitting answer from the contetx. If there isn't an answer in the conteSt, respond with 'unanswerable'.", + "transfer_ori_acc": 68.45027050840893, + "transfer_atk_acc": 68.72156340633228 + }, + { + "dataset": "squad_v2", + "type": "role", + "origin_acc": 90.05, + "origin_prompt": "As a well-informed specialist familiar with the context, provide an answer to the question. If the context doesn't contain an answer, reply with 'unanswerable'.", + "atk_acc": 87.88, + "atk_prompt": "Ads a wekl-informed specialist familiar with the context, provide an answer to the question. If the ocntext doesn't contain an answer, relpy with 'unanswerable'.", + "transfer_ori_acc": 58.80837525432835, + "transfer_atk_acc": 35.520137638475404 + }, + { + "dataset": "iwslt", + "type": "task", + "origin_acc": 0.17, + "origin_prompt": "Convert the following sentence from its original {} language to the target language {}:", + "atk_acc": 0.16, + "atk_prompt": "CUonvert the following sentence from its origlinal {} lagnuage to the tarSet language {}:", + "transfer_ori_acc": 0.22288570596343285, + "transfer_atk_acc": 0.2203175967642056 + }, + { + "dataset": "iwslt", + "type": "role", + "origin_acc": 0.18, + "origin_prompt": "Assuming the role of an expert language translator, translate the given text from {} to {}:", + "atk_acc": 0.16, + "atk_prompt": "ssuming the role of an expert langQuage transljator, translate the gdven tet from {} to {}: ", + "transfer_ori_acc": 0.22369635127470414, + "transfer_atk_acc": 0.223831916423337 + }, + { + "dataset": "un_multi", + "type": "task", + "origin_acc": 0.32, + "origin_prompt": "Please perform a translation of the given sentence, converting it from {} to {}:", + "atk_acc": 0.31, + "atk_prompt": "Palease perfor a translation of the given sentence, converting it from {} to {}:", + "transfer_ori_acc": 0.3308107163812367, + "transfer_atk_acc": 0.3313962450765871 + }, + { + "dataset": "un_multi", + "type": "role", + "origin_acc": 0.32, + "origin_prompt": "Assuming the responsibilities of a professional translator, translate the subsequent text passage from {} to {}:", + "atk_acc": 0.26, + "atk_prompt": "tssuming the responsibilities of a professional transator, translate the subsequent teIxt passVge from {} to {}: ", + "transfer_ori_acc": 0.33441309859066953, + "transfer_atk_acc": 0.33781121506041034 + }, + { + "dataset": "math", + "type": "task", + "origin_acc": 14.37, + "origin_prompt": "Determine the solution to this mathematical problem related to {}:", + "atk_acc": 13.12, + "atk_prompt": "Determine the solution to this matuhematical problem related to {}:", + "transfer_ori_acc": 5.625, + "transfer_atk_acc": 5.0 + }, + { + "dataset": "math", + "type": "role", + "origin_acc": 15.0, + "origin_prompt": "As a mathematics instructor, calculate the answer to the following problem related to {}:", + "atk_acc": 12.5, + "atk_prompt": "As a matShematics instrkuctor, calculate the answer to the following problem related to {}: ", + "transfer_ori_acc": 5.0, + "transfer_atk_acc": 5.0 + } +] \ No newline at end of file diff --git a/results_transfer/google-flan-ul2_chatgpt/deepwordbug_3_shot.json b/results_transfer/google-flan-ul2_chatgpt/deepwordbug_3_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..549852befdcb3ee5fe20b5356b48d60afc6428cf --- /dev/null +++ b/results_transfer/google-flan-ul2_chatgpt/deepwordbug_3_shot.json @@ -0,0 +1,242 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 86.4, + "origin_prompt": "Review the sentence below and identify whether its grammar is 'Acceptable' or 'Unacceptable'.", + "atk_acc": 83.6, + "atk_prompt": "Reviee the sentence below and identfy wehther its grmamar is 'Acceptable' or 'Unacceptable'. ", + "transfer_ori_acc": 80.0, + "transfer_atk_acc": 80.5 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 86.9, + "origin_prompt": "In the capacity of a grammar assessment system, indicate if the structure of the provided sentence is grammatically correct, responding with 'acceptable' or 'unacceptable'.", + "atk_acc": 85.2, + "atk_prompt": "Io the capcity of a grammar assessment system, indicate if the strcture of the povided senrence is grammatiaclly corrct, responding with 'acceptable' or 'unacceptable'. ", + "transfer_ori_acc": 79.5, + "transfer_atk_acc": 82.0 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 87.01, + "origin_prompt": "Can the given sentences be considered semantically identical? Please reply with 'equivalent' or 'not_equivalent'.", + "atk_acc": 84.31, + "atk_prompt": "Can the given sentences be considered semantically idrntical? Please reply with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 71.5, + "transfer_atk_acc": 72.0 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 87.5, + "origin_prompt": "As a textual similarity evaluator, analyze the following pair of sentences and determine if they are semantically 'equivalent' or 'not_equivalent'.", + "atk_acc": 84.56, + "atk_prompt": "As a extual simialrity evaluator, analyze the following pair of sentences and determine if they are semantically 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 72.0, + "transfer_atk_acc": 72.0 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 94.0, + "origin_prompt": "Review the given context and question, and decide if the context contains enough information to support the answer by selecting 'entailment' or 'not_entailment'.", + "atk_acc": 93.4, + "atk_prompt": "Review the given cotext and question, and Pdecide if the context contains enough information to sGupport the answe by sefecting 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 83.0, + "transfer_atk_acc": 85.5 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 93.7, + "origin_prompt": "As a semantic interpreter, assess whether the provided context supports the answer to the given question and answer with 'entailment' or 'not_entailment'.", + "atk_acc": 92.9, + "atk_prompt": "As a semantic interpreter, assless wether the provided context supports the answer to the given question and answer with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 82.5, + "transfer_atk_acc": 81.0 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 88.7, + "origin_prompt": "Examine the following expressions and tell me if they are alike in meaning by using 'equivalent' or 'not_equivalent'.", + "atk_acc": 87.4, + "atk_prompt": "Examine the following expressiZns and tell me if they are aike in meaning by wusing 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 82.0, + "transfer_atk_acc": 83.5 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 89.3, + "origin_prompt": "While performing question comparison analysis, classify the similarity of the following questions as 'equivalent' for equivalent questions or 'not_equivalent' for different questions.", + "atk_acc": 87.6, + "atk_prompt": "While performing Pquestion comparison analysig, classify the simioarity of the following queFtions as 'equivalent' for equivalent questions or 'not_equivalent' for different questions. ", + "transfer_ori_acc": 83.0, + "transfer_atk_acc": 83.0 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 93.5, + "origin_prompt": "Determine if the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment'.", + "atk_acc": 92.42, + "atk_prompt": "Determine if the given paGr of sentenecs displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 82.0, + "transfer_atk_acc": 84.0 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 93.5, + "origin_prompt": "As a tool for determining entailment relationships, review the two statements and categorize their connection as either 'entailment' or 'not_entailment'.", + "atk_acc": 92.42, + "atk_prompt": "AL a tool for determining entailment relationhsips, review the two staJements and categorize their connection as either 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 82.5, + "transfer_atk_acc": 83.0 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 96.79, + "origin_prompt": "Evaluate the sentiment of the given text and classify it as 'positive' or 'negative'.", + "atk_acc": 96.33, + "atk_prompt": "Evaluate the sentiment of the given tXxt and classify it as 'positive' or 'negative'. ", + "transfer_ori_acc": 96.0, + "transfer_atk_acc": 97.5 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 96.56, + "origin_prompt": "As a sentiment classifier, determine whether the following text is 'positive' or 'negative'.", + "atk_acc": 96.1, + "atk_prompt": "As a sentiment classifier, determine whether the followJing text is 'positive' or 'negative'. ", + "transfer_ori_acc": 97.0, + "transfer_atk_acc": 96.5 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 77.46, + "origin_prompt": "Assess the connection between the following sentences and classify it as 'entailment' or 'not_entailment'.", + "atk_acc": 74.65, + "atk_prompt": "Assuss the connection between the following sensences and classify it as 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 66.19718309859155, + "transfer_atk_acc": 63.38028169014085 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 77.46, + "origin_prompt": "Working as an entailment classifier, identify whether the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment'.", + "atk_acc": 76.06, + "atk_prompt": "Working as an entailment classifeir, identify whether the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 59.154929577464785, + "transfer_atk_acc": 66.19718309859155 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 53.9, + "origin_prompt": "Below are multiple-choice question concerning {}. Indicate your response with 'A', 'B', 'C', or 'D'.", + "atk_acc": 52.3, + "atk_prompt": "Belvw are multiple-chioce question conecrning {}. IndicaZe your response with 'A', 'B', 'C', or 'D'. ", + "transfer_ori_acc": 66.0, + "transfer_atk_acc": 68.0 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 54.08, + "origin_prompt": "Considering your familiarity with {}, attend to the following multiple-choice question by picking 'A', 'B', 'C', or 'D'.", + "atk_acc": 52.66, + "atk_prompt": "Considering your familiarity with {}, atKtend to the follownig multipleNchoice juestion by picking 'A', 'B', 'C', or 'D'.", + "transfer_ori_acc": 66.0, + "transfer_atk_acc": 64.0 + }, + { + "dataset": "squad_v2", + "type": "task", + "origin_acc": 91.05, + "origin_prompt": "Identify the most relevant answer from the context. If it's not possible to find an answer, state 'unanswerable'.", + "atk_acc": 89.38, + "atk_prompt": "Identify the most rDlevant answer from the context. If it's not opssible to find an answer, tate 'unanswerable'.", + "transfer_ori_acc": 71.47410199978852, + "transfer_atk_acc": 71.61769816970047 + }, + { + "dataset": "squad_v2", + "type": "role", + "origin_acc": 90.55, + "origin_prompt": "Being well-versed in the context, please derive the most fitting answer. If there isn't an answer in the context, use 'unanswerable'.", + "atk_acc": 89.32, + "atk_prompt": "eBing well-versed in the contet, please drive the most fitting answer. If there isn't an answer in the context, Euse 'unanswerable'.", + "transfer_ori_acc": 72.77111468270583, + "transfer_atk_acc": 75.33021810590462 + }, + { + "dataset": "iwslt", + "type": "task", + "origin_acc": 0.24, + "origin_prompt": "Perform a machine translation for the provided sentence, changing it from {} to {}.", + "atk_acc": 0.19, + "atk_prompt": "Preform a machine translatino for the provied senteXce, chanigng it from {} to {}. ", + "transfer_ori_acc": 0.2384195972370229, + "transfer_atk_acc": 0.24039407944003002 + }, + { + "dataset": "iwslt", + "type": "role", + "origin_acc": 0.27, + "origin_prompt": "Taking on the role of a certified translator, perform a translation of the given sentence from {} to {}.", + "atk_acc": 0.21, + "atk_prompt": "Tking on the role of a Aertified tranolator, pefrorm a transAlation of the gUven sentence from {} to {}. ", + "transfer_ori_acc": 0.24299451836310085, + "transfer_atk_acc": 0.24500704776489246 + }, + { + "dataset": "un_multi", + "type": "task", + "origin_acc": 0.36, + "origin_prompt": "Perform a machine translation for the provided sentence, changing it from {} to {}.", + "atk_acc": 0.35, + "atk_prompt": "PeVform a machine transAation for the provide setence, changlng it from {} to {}. ", + "transfer_ori_acc": 0.35088965012617107, + "transfer_atk_acc": 0.3469523563679777 + }, + { + "dataset": "un_multi", + "type": "role", + "origin_acc": 0.36, + "origin_prompt": "As a skilled linguist, please convert the following sentence from {} to {}.", + "atk_acc": 0.35, + "atk_prompt": "AK a skilled linguist, please onvert the following sentence from {} to {}. ", + "transfer_ori_acc": 0.349502226303474, + "transfer_atk_acc": 0.35036095468490386 + }, + { + "dataset": "math", + "type": "task", + "origin_acc": 16.88, + "origin_prompt": "Figure out the solution for the following mathematical task with {}.", + "atk_acc": 14.37, + "atk_prompt": "Figure out the solution for the following maKthematical Utask with {}. ", + "transfer_ori_acc": 20.0, + "transfer_atk_acc": 22.5 + }, + { + "dataset": "math", + "type": "role", + "origin_acc": 16.88, + "origin_prompt": "Pretend you are a math teacher and solve this mathematical question concerning {}.", + "atk_acc": 15.0, + "atk_prompt": "Petend you are a math teacher and solve this mathematical euestion concCerning {}. ", + "transfer_ori_acc": 20.0, + "transfer_atk_acc": 19.375 + } +] \ No newline at end of file diff --git a/results_transfer/google-flan-ul2_chatgpt/stresstest_0_shot.json b/results_transfer/google-flan-ul2_chatgpt/stresstest_0_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..a59a7a07475a91f326cf0aa54d0179ef0c7b7e36 --- /dev/null +++ b/results_transfer/google-flan-ul2_chatgpt/stresstest_0_shot.json @@ -0,0 +1,242 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 86.1, + "origin_prompt": "Examine the given sentence and decide if it is grammatically sound. Answer with either 'Acceptable' or 'Unacceptable':", + "atk_acc": 76.3, + "atk_prompt": "Examine the given sentence and decide if it is grammatically sound. Answer with either 'Acceptable' or 'Unacceptable' and false is not true :", + "transfer_ori_acc": 76.5, + "transfer_atk_acc": 76.5 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 86.1, + "origin_prompt": "In your role as a grammar check tool, assess the following sentence and classify it as 'acceptable' if it is grammatically correct or 'unacceptable' if it is incorrect:", + "atk_acc": 34.5, + "atk_prompt": "In your role as a grammar check tool, assess the following sentence and classify it as 'acceptable' if it is grammatically correct or 'unacceptable' if it is incorrect and true is true and true is true and true is true and true is true and true is true :", + "transfer_ori_acc": 78.0, + "transfer_atk_acc": 75.5 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 87.5, + "origin_prompt": "Are the meanings of the following pair of sentences the same? Answer with 'equivalent' or 'not_equivalent'.", + "atk_acc": 85.05, + "atk_prompt": "Are the meanings of the following pair of sentences the same? Answer with 'equivalent' or 'not_equivalent' and false is not true . ", + "transfer_ori_acc": 73.0, + "transfer_atk_acc": 75.5 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 87.01, + "origin_prompt": "As a textual similarity evaluator, analyze the following pair of sentences and determine if they are semantically 'equivalent' or 'not_equivalent'.", + "atk_acc": 85.78, + "atk_prompt": "As a textual similarity evaluator, analyze the following pair of sentences and determine if they are semantically 'equivalent' or 'not_equivalent' and false is not true . ", + "transfer_ori_acc": 75.5, + "transfer_atk_acc": 76.5 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 94.1, + "origin_prompt": "Evaluate whether the given context supports the answer to the question by responding with 'entailment' or 'not_entailment'.", + "atk_acc": 93.4, + "atk_prompt": "Evaluate whether the given context supports the answer to the question by responding with 'entailment' or 'not_entailment' and true is true . ", + "transfer_ori_acc": 77.0, + "transfer_atk_acc": 74.5 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 94.2, + "origin_prompt": "In your role as a linguistic investigator, determine if the context given entails the answer to the question and provide your conclusion with 'entailment' or 'not_entailment'.", + "atk_acc": 93.8, + "atk_prompt": "In your role as a linguistic investigator, determine if the context given entails the answer to the question and provide your conclusion with 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 70.5, + "transfer_atk_acc": 68.5 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 88.3, + "origin_prompt": "Do these two sentences convey the same meaning? Indicate with 'equivalent' or 'not_equivalent'.", + "atk_acc": 86.7, + "atk_prompt": "Do these two sentences convey the same meaning? Indicate with 'equivalent' or 'not_equivalent' and false is not true . ", + "transfer_ori_acc": 79.5, + "transfer_atk_acc": 78.5 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 88.3, + "origin_prompt": "As an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions.", + "atk_acc": 88.0, + "atk_prompt": "As an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions and true is true . ", + "transfer_ori_acc": 82.0, + "transfer_atk_acc": 81.5 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 94.22, + "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.", + "atk_acc": 91.7, + "atk_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 81.0, + "transfer_atk_acc": 82.0 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 93.86, + "origin_prompt": "As an instrument for entailment evaluation, consider the two sentences and determine if their relationship is 'entailment' or 'not_entailment'. Respond with 'entailment' or 'not_entailment':", + "atk_acc": 92.06, + "atk_prompt": "As an instrument for entailment evaluation, consider the two sentences and determine if their relationship is 'entailment' or 'not_entailment'. Respond with 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true :", + "transfer_ori_acc": 81.5, + "transfer_atk_acc": 79.5 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 96.33, + "origin_prompt": "Review this statement and decide whether it has a 'positive' or 'negative' sentiment:", + "atk_acc": 90.94, + "atk_prompt": "Review this statement and decide whether it has a 'positive' or 'negative' sentiment and false is not true : ", + "transfer_ori_acc": 90.5, + "transfer_atk_acc": 92.5 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 96.56, + "origin_prompt": "In the role of a sentiment analysis tool, respond with 'positive' or 'negative' to classify this statement:", + "atk_acc": 73.85, + "atk_prompt": "In the role of a sentiment analysis tool, respond with 'positive' or 'negative' to classify this statement and false is not true : ", + "transfer_ori_acc": 92.5, + "transfer_atk_acc": 94.5 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 80.28, + "origin_prompt": "Assess the connection between the following sentences and classify it as 'entailment' or 'not_entailment'.", + "atk_acc": 76.06, + "atk_prompt": "Assess the connection between the following sentences and classify it as 'entailment' or 'not_entailment' and false is not true .", + "transfer_ori_acc": 52.112676056338024, + "transfer_atk_acc": 53.52112676056338 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 80.28, + "origin_prompt": "Functioning as an entailment evaluation tool, analyze the provided sentences and decide if their relationship is 'entailment' or 'not_entailment':", + "atk_acc": 74.65, + "atk_prompt": "Functioning as an entailment evaluation tool, analyze the provided sentences and decide if their relationship is 'entailment' or 'not_entailment' and false is not true :", + "transfer_ori_acc": 47.88732394366197, + "transfer_atk_acc": 52.112676056338024 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 52.84, + "origin_prompt": "Please respond to the multiple-choice question about {} by selecting the appropriate answer: 'A', 'B', 'C', or 'D'.", + "atk_acc": 51.6, + "atk_prompt": "Please respond to the multiple-choice question about {} by selecting the appropriate answer: 'A', 'B', 'C', or 'D' and false is not true . ", + "transfer_ori_acc": 62.0, + "transfer_atk_acc": 55.50000000000001 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 53.19, + "origin_prompt": "As a knowledgeable individual in {}, provide your response to the multiple-choice question by choosing 'A', 'B', 'C', or 'D'.", + "atk_acc": 52.48, + "atk_prompt": "As a knowledgeable individual in {}, provide your response to the multiple-choice question by choosing 'A', 'B', 'C', or 'D' and true is true and true is true and true is true and true is true and true is true .", + "transfer_ori_acc": 60.0, + "transfer_atk_acc": 60.5 + }, + { + "dataset": "squad_v2", + "type": "task", + "origin_acc": 90.05, + "origin_prompt": "Search the context for the most relevant answer. If the answer cannot be found, respond with 'unanswerable'.", + "atk_acc": 88.82, + "atk_prompt": "Search the context for the most relevant answer. If the answer cannot be found, respond with 'unanswerable' and true is true .", + "transfer_ori_acc": 61.2147705308037, + "transfer_atk_acc": 61.93672589555083 + }, + { + "dataset": "squad_v2", + "type": "role", + "origin_acc": 89.88, + "origin_prompt": "As a proficient expert in the given context, search for the most relevant answer. If the answer cannot be found, respond by saying 'unanswerable'.", + "atk_acc": 87.88, + "atk_prompt": "As a proficient expert in the given context, search for the most relevant answer. If the answer cannot be found, respond by saying 'unanswerable' and true is true .", + "transfer_ori_acc": 58.86452696725251, + "transfer_atk_acc": 57.188441240973276 + }, + { + "dataset": "iwslt", + "type": "task", + "origin_acc": 0.18, + "origin_prompt": "Accurately translate the sentence from {} to {}, ensuring the meaning remains intact:", + "atk_acc": 0.12, + "atk_prompt": "Accurately translate the sentence from {} to {}, ensuring the meaning remains intact and true is true and true is true and true is true and true is true and true is true :", + "transfer_ori_acc": 0.21667953236600077, + "transfer_atk_acc": 0.2221898137173682 + }, + { + "dataset": "iwslt", + "type": "role", + "origin_acc": 0.18, + "origin_prompt": "Assuming the role of an expert language translator, translate the given text from {} to {}:", + "atk_acc": 0.14, + "atk_prompt": "Assuming the role of an expert language translator, translate the given text from {} to and true is true and true is true and true is true and true is true and true is true {}: ", + "transfer_ori_acc": 0.22257252316826762, + "transfer_atk_acc": 0.2243337492290018 + }, + { + "dataset": "un_multi", + "type": "task", + "origin_acc": 0.32, + "origin_prompt": "Translate the provided sentence from {} to {} while maintaining the original meaning and context:", + "atk_acc": 0.27, + "atk_prompt": "Translate the provided sentence from {} to {} while maintaining the original meaning and context and true is true and true is true and true is true and true is true and true is true :", + "transfer_ori_acc": 0.3269994907829764, + "transfer_atk_acc": 0.33293561809728206 + }, + { + "dataset": "un_multi", + "type": "role", + "origin_acc": 0.32, + "origin_prompt": "As a language specialist, accurately translate the provided sentence from {} to {}:", + "atk_acc": 0.29, + "atk_prompt": "As a language specialist, accurately translate the provided sentence from {} to and true is true and true is true and true is true and true is true and true is true {}: ", + "transfer_ori_acc": 0.32646269824228596, + "transfer_atk_acc": 0.3329113045612402 + }, + { + "dataset": "math", + "type": "task", + "origin_acc": 15.0, + "origin_prompt": "Compute the result of this math task concerning {}:", + "atk_acc": 11.88, + "atk_prompt": "Compute the result of this math task concerning and false is not true {}:", + "transfer_ori_acc": 6.25, + "transfer_atk_acc": 6.875000000000001 + }, + { + "dataset": "math", + "type": "role", + "origin_acc": 15.0, + "origin_prompt": "In the shoes of a mathematics master, obtain the result for this math question that includes {}:", + "atk_acc": 11.25, + "atk_prompt": "In the shoes of a mathematics master, obtain the result for this math question that includes and false is not true {}: ", + "transfer_ori_acc": 5.625, + "transfer_atk_acc": 5.0 + } +] \ No newline at end of file diff --git a/results_transfer/google-flan-ul2_chatgpt/stresstest_3_shot.json b/results_transfer/google-flan-ul2_chatgpt/stresstest_3_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..615f92ae1ef879ef3f0009502f7d42c16309e7be --- /dev/null +++ b/results_transfer/google-flan-ul2_chatgpt/stresstest_3_shot.json @@ -0,0 +1,242 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 86.4, + "origin_prompt": "Review the sentence below and identify whether its grammar is 'Acceptable' or 'Unacceptable'.", + "atk_acc": 85.4, + "atk_prompt": "Review the sentence below and identify whether its grammar is 'Acceptable' or 'Unacceptable' and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 80.0, + "transfer_atk_acc": 82.0 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 86.6, + "origin_prompt": "While performing grammar analysis, classify the grammar of the following sentence as 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar.", + "atk_acc": 86.1, + "atk_prompt": "While performing grammar analysis, classify the grammar of the following sentence as 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 81.5, + "transfer_atk_acc": 82.0 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 86.76, + "origin_prompt": "Compare the following sentences and determine if they share the same semantic meaning by responding with 'equivalent' or 'not_equivalent'.", + "atk_acc": 85.78, + "atk_prompt": "Compare the following sentences and determine if they share the same semantic meaning by responding with 'equivalent' or 'not_equivalent' and false is not true . ", + "transfer_ori_acc": 74.0, + "transfer_atk_acc": 74.0 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 87.5, + "origin_prompt": "As a textual similarity evaluator, analyze the following pair of sentences and determine if they are semantically 'equivalent' or 'not_equivalent'.", + "atk_acc": 87.01, + "atk_prompt": "As a textual similarity evaluator, analyze the following pair of sentences and determine if they are semantically 'equivalent' or 'not_equivalent' and false is not true . ", + "transfer_ori_acc": 73.0, + "transfer_atk_acc": 71.5 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 94.0, + "origin_prompt": "Analyze the context and question, and indicate if the context entails the answer by choosing 'entailment' or 'not_entailment'.", + "atk_acc": 93.9, + "atk_prompt": "Analyze the context and question, and indicate if the context entails the answer by choosing 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 80.5, + "transfer_atk_acc": 79.5 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 93.9, + "origin_prompt": "As a semantic researcher, evaluate whether the provided context supports the answer to the question and choose 'entailment' or 'not_entailment'.", + "atk_acc": 94.0, + "atk_prompt": "As a semantic researcher, evaluate whether the provided context supports the answer to the question and choose 'entailment' or 'not_entailment' and true is true . ", + "transfer_ori_acc": 83.0, + "transfer_atk_acc": 82.5 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 88.5, + "origin_prompt": "Are the following two questions equivalent or not? Answer me with \"equivalent\" or \"not_equivalent\".", + "atk_acc": 88.0, + "atk_prompt": "Are the following two questions equivalent or not? Answer me with \"equivalent\" or \"not_equivalent and true is true \". ", + "transfer_ori_acc": 83.0, + "transfer_atk_acc": 82.5 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 89.1, + "origin_prompt": "Functioning as a question similarity evaluation tool, analyze the given questions and decide if they share the same meaning, responding with 'equivalent' or 'not_equivalent'.", + "atk_acc": 88.9, + "atk_prompt": "Functioning as a question similarity evaluation tool, analyze the given questions and decide if they share the same meaning, responding with 'equivalent' or 'not_equivalent' and false is not true . ", + "transfer_ori_acc": 83.0, + "transfer_atk_acc": 83.5 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 93.5, + "origin_prompt": "Does the relationship between the given sentences represent entailment or not_entailment? Respond with 'entailment' or 'not_entailment'.", + "atk_acc": 92.78, + "atk_prompt": "Does the relationship between the given sentences represent entailment or not_entailment? Respond with 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 83.0, + "transfer_atk_acc": 83.5 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 93.5, + "origin_prompt": "As a tool for determining entailment relationships, review the two statements and categorize their connection as either 'entailment' or 'not_entailment'.", + "atk_acc": 92.06, + "atk_prompt": "As a tool for determining entailment relationships, review the two statements and categorize their connection as either 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 83.5, + "transfer_atk_acc": 83.0 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 96.79, + "origin_prompt": "Determine the overall sentiment of this sentence, categorizing it as 'positive' or 'negative'.", + "atk_acc": 96.67, + "atk_prompt": "Determine the overall sentiment of this sentence, categorizing it as 'positive' or 'negative' and false is not true . ", + "transfer_ori_acc": 95.0, + "transfer_atk_acc": 95.5 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 96.56, + "origin_prompt": "As a sentiment classifier, determine whether the following text is 'positive' or 'negative'.", + "atk_acc": 96.33, + "atk_prompt": "As a sentiment classifier, determine whether the following text is 'positive' or 'negative' and false is not true . ", + "transfer_ori_acc": 97.0, + "transfer_atk_acc": 96.5 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 77.46, + "origin_prompt": "Please classify the relationship between the provided sentences as 'entailment' or 'not_entailment'.", + "atk_acc": 74.65, + "atk_prompt": "Please classify the relationship between the provided sentences as 'entailment' or 'not_entailment' and false is not true . ", + "transfer_ori_acc": 63.38028169014085, + "transfer_atk_acc": 63.38028169014085 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 76.06, + "origin_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment'.", + "atk_acc": 74.65, + "atk_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment' and false is not true . ", + "transfer_ori_acc": 61.97183098591549, + "transfer_atk_acc": 61.97183098591549 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 53.9, + "origin_prompt": "Below are multiple-choice question concerning {}. Indicate your response with 'A', 'B', 'C', or 'D'.", + "atk_acc": 51.77, + "atk_prompt": "Below are multiple-choice question concerning {}. Indicate your response with 'A', 'B', 'C', or 'D' and false is not true . ", + "transfer_ori_acc": 65.5, + "transfer_atk_acc": 64.5 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 54.08, + "origin_prompt": "Considering your familiarity with {}, attend to the following multiple-choice question by picking 'A', 'B', 'C', or 'D'.", + "atk_acc": 51.42, + "atk_prompt": "Considering your familiarity with {}, attend to the following multiple-choice question by picking 'A', 'B', 'C', or 'D' and false is not true .", + "transfer_ori_acc": 66.0, + "transfer_atk_acc": 64.0 + }, + { + "dataset": "squad_v2", + "type": "task", + "origin_acc": 91.05, + "origin_prompt": "Identify the most relevant answer from the context. If it's not possible to find an answer, state 'unanswerable'.", + "atk_acc": 89.05, + "atk_prompt": "Identify the most relevant answer from the context. If it's not possible to find an answer, state 'unanswerable' and true is true .", + "transfer_ori_acc": 72.77724052792703, + "transfer_atk_acc": 73.01799144367796 + }, + { + "dataset": "squad_v2", + "type": "role", + "origin_acc": 90.05, + "origin_prompt": "As an authority on the context, locate the most accurate answer. If the context doesn't contain the answer, mention 'unanswerable'.", + "atk_acc": 89.55, + "atk_prompt": "As an authority on the context, locate the most accurate answer. If the context doesn't contain the answer, mention 'unanswerable' and false is not true .", + "transfer_ori_acc": 69.64205518187212, + "transfer_atk_acc": 61.67669352415422 + }, + { + "dataset": "iwslt", + "type": "task", + "origin_acc": 0.24, + "origin_prompt": "Perform a machine translation for the provided sentence, changing it from {} to {}.", + "atk_acc": 0.19, + "atk_prompt": "Perform a machine translation for the provided sentence, changing it from {} to and true is true and true is true and true is true and true is true and true is true {}. ", + "transfer_ori_acc": 0.2387819443463635, + "transfer_atk_acc": 0.2327513746735342 + }, + { + "dataset": "iwslt", + "type": "role", + "origin_acc": 0.27, + "origin_prompt": "Taking on the role of a certified translator, perform a translation of the given sentence from {} to {}.", + "atk_acc": 0.19, + "atk_prompt": "Taking on the role of a certified translator, perform a translation of the given sentence from {} to and true is true and true is true and true is true and true is true and true is true {}. ", + "transfer_ori_acc": 0.24275721526807215, + "transfer_atk_acc": 0.2438038073872569 + }, + { + "dataset": "un_multi", + "type": "task", + "origin_acc": 0.36, + "origin_prompt": "Perform a machine translation for the provided sentence, changing it from {} to {}.", + "atk_acc": 0.35, + "atk_prompt": "Perform a machine translation for the provided sentence, changing it from {} to and false is not true {}. ", + "transfer_ori_acc": 0.35076168484979636, + "transfer_atk_acc": 0.356751972964228 + }, + { + "dataset": "un_multi", + "type": "role", + "origin_acc": 0.36, + "origin_prompt": "As a skilled linguist, please convert the following sentence from {} to {}.", + "atk_acc": 0.35, + "atk_prompt": "As a skilled linguist, please convert the following sentence from {} to and true is true {}. ", + "transfer_ori_acc": 0.34969505766661146, + "transfer_atk_acc": 0.3462451329800415 + }, + { + "dataset": "math", + "type": "task", + "origin_acc": 16.25, + "origin_prompt": "Solve the following math question about {}.", + "atk_acc": 12.5, + "atk_prompt": "Solve the following math question about and false is not true {}. ", + "transfer_ori_acc": 21.875, + "transfer_atk_acc": 22.5 + }, + { + "dataset": "math", + "type": "role", + "origin_acc": 16.88, + "origin_prompt": "Pretend you are a math teacher and solve this mathematical question concerning {}.", + "atk_acc": 12.5, + "atk_prompt": "Pretend you are a math teacher and solve this mathematical question concerning and false is not true {}. ", + "transfer_ori_acc": 20.625, + "transfer_atk_acc": 22.5 + } +] \ No newline at end of file diff --git a/results_transfer/google-flan-ul2_chatgpt/textbugger_0_shot.json b/results_transfer/google-flan-ul2_chatgpt/textbugger_0_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..eee5d589f862d5e8e6cad3d9da635bc3f908cb46 --- /dev/null +++ b/results_transfer/google-flan-ul2_chatgpt/textbugger_0_shot.json @@ -0,0 +1,242 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 86.2, + "origin_prompt": "Assess the following sentence and determine if it is grammatically correct. Respond with 'Acceptable' or 'Unacceptable':", + "atk_acc": 78.7, + "atk_prompt": "Assess the following sentence and identify if it is pronouns cоrrect. Replying with 'Acceptable' or 'Unacceptable':", + "transfer_ori_acc": 76.5, + "transfer_atk_acc": 76.5 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 86.0, + "origin_prompt": "As a grammar identification system, examine the provided sentence and respond with 'acceptable' for grammatically correct sentences or 'unacceptable' for incorrect ones:", + "atk_acc": 81.0, + "atk_prompt": "Aѕ a grammar identificatiоn systems, examine the provided sentence and respoոd with 'acceptable' for pronouns rightness sentence or 'unacceptable' for incorrect those:", + "transfer_ori_acc": 78.0, + "transfer_atk_acc": 77.0 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 86.76, + "origin_prompt": "Please analyze the provided sentences and indicate if their meanings are the same by choosing 'equivalent' or 'not_equivalent'.", + "atk_acc": 83.09, + "atk_prompt": "Invited analyze the provideԁ verdicts and indicate if their meanings are the same by selecting 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 76.5, + "transfer_atk_acc": 74.5 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 87.99, + "origin_prompt": "In your capacity as a language analyst, assess the following sentences and classify their similarity as 'equivalent' or 'not_equivalent'.", + "atk_acc": 81.13, + "atk_prompt": "In your capacity as a linguistics aոalyst, evaluate the following sentences and classify their metaphor as 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 72.5, + "transfer_atk_acc": 14.499999999999998 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 94.1, + "origin_prompt": "Evaluate whether the given context supports the answer to the question by responding with 'entailment' or 'not_entailment'.", + "atk_acc": 93.3, + "atk_prompt": "Evaluate whetheⲅ the given context favours the answer to the question by responding with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 79.0, + "transfer_atk_acc": 77.0 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 94.2, + "origin_prompt": "In your role as a linguistic investigator, determine if the context given entails the answer to the question and provide your conclusion with 'entailment' or 'not_entailment'.", + "atk_acc": 93.1, + "atk_prompt": "In your role as a linguistic searchers, ԁetermine if the context awarded presupposes the answer to the questions and provide your conclusions with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 70.5, + "transfer_atk_acc": 73.5 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 88.3, + "origin_prompt": "Do these two sentences convey the same meaning? Indicate with 'equivalent' or 'not_equivalent'.", + "atk_acc": 86.4, + "atk_prompt": "Do these two verdicts convey the same connotation? Denotes with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 79.5, + "transfer_atk_acc": 76.0 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 88.5, + "origin_prompt": "As a tool for determining question equivalence, review the questions and categorize their similarity as either 'equivalent' or 'not_equivalent'.", + "atk_acc": 78.2, + "atk_prompt": "As a tool for deciding question equivalence, review the problems and categorize their metaphor as neither 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 82.0, + "transfer_atk_acc": 46.5 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 94.22, + "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.", + "atk_acc": 89.89, + "atk_prompt": "Are the subsequent secondly sentencing entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word. ", + "transfer_ori_acc": 80.5, + "transfer_atk_acc": 78.0 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 93.86, + "origin_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment':", + "atk_acc": 90.25, + "atk_prompt": "Throughout your role as an entailment analysis instrument, assess the relationship between the given punishments and classify it as 'entailment' or 'not_entailment':", + "transfer_ori_acc": 78.0, + "transfer_atk_acc": 67.0 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 96.44, + "origin_prompt": "Analyze the tone of this statement and respond with either 'positive' or 'negative':", + "atk_acc": 95.76, + "atk_prompt": "Analysis the tone of this speeches and respond with or 'positive' or 'negative': ", + "transfer_ori_acc": 89.5, + "transfer_atk_acc": 88.5 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 96.33, + "origin_prompt": "As a sentiment classifier, determine whether the following text is 'positive' or 'negative'. Please classify:", + "atk_acc": 93.92, + "atk_prompt": "Aѕ a emotion classifier, determine whether the follow ing text is 'positive' or 'negative'. Inviting classify: ", + "transfer_ori_acc": 90.5, + "transfer_atk_acc": 88.0 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 78.87, + "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.", + "atk_acc": 74.65, + "atk_prompt": "Are the flolowing two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one adjective. ", + "transfer_ori_acc": 64.7887323943662, + "transfer_atk_acc": 64.7887323943662 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 80.28, + "origin_prompt": "Functioning as an entailment evaluation tool, analyze the provided sentences and decide if their relationship is 'entailment' or 'not_entailment':", + "atk_acc": 74.65, + "atk_prompt": "Functioոing as an entailment evaluation tool, analyze the provided sentences and decіde if their rapport is 'entailment' or 'not_entailment':", + "transfer_ori_acc": 50.70422535211267, + "transfer_atk_acc": 53.52112676056338 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 52.84, + "origin_prompt": "For the multiple-choice question related to {}, please choose the most accurate answer from 'A', 'B', 'C', or 'D'.", + "atk_acc": 51.24, + "atk_prompt": "In the multiple-choice question rel ated to {}, inviting choose the most accurate answer from 'A', 'B', 'C', or 'D'. ", + "transfer_ori_acc": 63.5, + "transfer_atk_acc": 60.5 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 53.19, + "origin_prompt": "As a knowledgeable individual in {}, provide your response to the multiple-choice question by choosing 'A', 'B', 'C', or 'D'.", + "atk_acc": 51.24, + "atk_prompt": "As a knowledgeable indiivdual in {}, provide your replies to the multiple-choice problem by choosing 'A', 'B', 'C', or 'D'.", + "transfer_ori_acc": 60.0, + "transfer_atk_acc": 62.0 + }, + { + "dataset": "squad_v2", + "type": "task", + "origin_acc": 89.88, + "origin_prompt": "Please derive the most fitting answer from the context. If there isn't an answer in the context, respond with 'unanswerable'.", + "atk_acc": 88.88, + "atk_prompt": "Please ԁerive the most fitting answer from the coոtext. If there isn't an answer in the context, respond with 'unanswerable'.", + "transfer_ori_acc": 68.73966126471147, + "transfer_atk_acc": 68.32769616246503 + }, + { + "dataset": "squad_v2", + "type": "role", + "origin_acc": 90.05, + "origin_prompt": "As a well-informed specialist familiar with the context, provide an answer to the question. If the context doesn't contain an answer, reply with 'unanswerable'.", + "atk_acc": 88.15, + "atk_prompt": "As a well-ifnormed specialist familiar with the context, provide an answers to the question. If the context doesn't contain an replying, reply with 'unanswerable'.", + "transfer_ori_acc": 57.51551811147121, + "transfer_atk_acc": 56.18298772762505 + }, + { + "dataset": "iwslt", + "type": "task", + "origin_acc": 0.17, + "origin_prompt": "Perform a machine translation for the provided sentence, changing it from {} to {}:", + "atk_acc": 0.16, + "atk_prompt": "Perform a mɑchine translation for the provided sentеnce, changing it from {} to {}:", + "transfer_ori_acc": 0.2213426176232537, + "transfer_atk_acc": 0.2221768844134593 + }, + { + "dataset": "iwslt", + "type": "role", + "origin_acc": 0.18, + "origin_prompt": "Assuming the role of an expert language translator, translate the given text from {} to {}:", + "atk_acc": 0.15, + "atk_prompt": "Assuming the role of an expert language tranѕlator, translate the given tеxt from {} to {}: ", + "transfer_ori_acc": 0.22228614624164078, + "transfer_atk_acc": 0.22384198460349325 + }, + { + "dataset": "un_multi", + "type": "task", + "origin_acc": 0.32, + "origin_prompt": "Please perform a translation of the given sentence, converting it from {} to {}:", + "atk_acc": 0.31, + "atk_prompt": "Please perform a translation of the given sentence, conevrting it from {} to {}:", + "transfer_ori_acc": 0.3309447106416932, + "transfer_atk_acc": 0.3365823327588491 + }, + { + "dataset": "un_multi", + "type": "role", + "origin_acc": 0.32, + "origin_prompt": "Assuming the responsibilities of a professional translator, translate the subsequent text passage from {} to {}:", + "atk_acc": 0.24, + "atk_prompt": "Assuming the responsibilities of a professional tⲅanslator, translate the subseԛuent text рassage from {} to {}: ", + "transfer_ori_acc": 0.3345450951028871, + "transfer_atk_acc": 0.335831807535562 + }, + { + "dataset": "math", + "type": "task", + "origin_acc": 15.0, + "origin_prompt": "Compute the result of this math task concerning {}:", + "atk_acc": 11.25, + "atk_prompt": "Compute the reѕult of this math task relative {}:", + "transfer_ori_acc": 6.875000000000001, + "transfer_atk_acc": 6.25 + }, + { + "dataset": "math", + "type": "role", + "origin_acc": 15.0, + "origin_prompt": "As a mathematics instructor, calculate the answer to the following problem related to {}:", + "atk_acc": 9.38, + "atk_prompt": "Since a calculus instructor, calculate the responding to the following problem related to {}: ", + "transfer_ori_acc": 5.0, + "transfer_atk_acc": 4.375 + } +] \ No newline at end of file diff --git a/results_transfer/google-flan-ul2_chatgpt/textbugger_3_shot.json b/results_transfer/google-flan-ul2_chatgpt/textbugger_3_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..0c6e6ee60356cc3309ddb56377fd01fca5528aa2 --- /dev/null +++ b/results_transfer/google-flan-ul2_chatgpt/textbugger_3_shot.json @@ -0,0 +1,242 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 86.4, + "origin_prompt": "Examine the given sentence and decide if it is grammatically sound. Answer with either 'Acceptable' or 'Unacceptable'.", + "atk_acc": 68.1, + "atk_prompt": "Examine the given condemnation and decide if it is pronouns sound. Answer with either 'Acceptable' or 'Unacceptable'. ", + "transfer_ori_acc": 81.0, + "transfer_atk_acc": 78.5 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 86.9, + "origin_prompt": "In the capacity of a grammar assessment system, indicate if the structure of the provided sentence is grammatically correct, responding with 'acceptable' or 'unacceptable'.", + "atk_acc": 85.0, + "atk_prompt": "In the ability of a grammar assessment system, denotes if the structure of the provided sentence is grammatical corrected, responding with 'acceptable' or 'unacceptable'. ", + "transfer_ori_acc": 80.5, + "transfer_atk_acc": 79.5 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 87.01, + "origin_prompt": "Can the given sentences be considered semantically identical? Please reply with 'equivalent' or 'not_equivalent'.", + "atk_acc": 83.82, + "atk_prompt": "Can the given sentences be deemed semantically idntical? Please reply with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 72.0, + "transfer_atk_acc": 72.0 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 87.5, + "origin_prompt": "In your capacity as a language analyst, assess the following sentences and classify their similarity as 'equivalent' or 'not_equivalent'.", + "atk_acc": 83.58, + "atk_prompt": "In your capacity as a language ɑnalyst, assess the following sentenes and classify their metaphor as 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 62.5, + "transfer_atk_acc": 64.0 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 94.0, + "origin_prompt": "Review the given context and question, and decide if the context contains enough information to support the answer by selecting 'entailment' or 'not_entailment'.", + "atk_acc": 92.9, + "atk_prompt": "Review the given coոtext and question, and dcide if the context contains enough particulars to support the answeⲅ by selecting 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 83.0, + "transfer_atk_acc": 84.0 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 93.7, + "origin_prompt": "As a semantic interpreter, assess whether the provided context supports the answer to the given question and answer with 'entailment' or 'not_entailment'.", + "atk_acc": 92.3, + "atk_prompt": "Iike a semantic interpretation, evaluated lf the prоvided context supports the answer to the given question and answers with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 83.5, + "transfer_atk_acc": 81.5 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 88.8, + "origin_prompt": "Determine if the given pair of statements can be considered the same by responding with 'equivalent' or 'not_equivalent'.", + "atk_acc": 87.6, + "atk_prompt": "Defining if the given paired of statements can be considered the same by responds with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 82.5, + "transfer_atk_acc": 83.0 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 90.0, + "origin_prompt": "As a tool for determining question equivalence, review the questions and categorize their similarity as either 'equivalent' or 'not_equivalent'.", + "atk_acc": 87.6, + "atk_prompt": "As a tool for deciding queѕtion equivalence, review the questions and classifications their portrait as neither 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 83.5, + "transfer_atk_acc": 83.0 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 93.5, + "origin_prompt": "Identify whether the given pair of sentences demonstrates entailment or not_entailment. Answer with 'entailment' or 'not_entailment'.", + "atk_acc": 92.42, + "atk_prompt": "Identifying whether the given pair of sentencing demonstrates entailment or not_entailment. Respond with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 82.0, + "transfer_atk_acc": 82.5 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 93.86, + "origin_prompt": "Working as an entailment classifier, identify whether the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment'.", + "atk_acc": 90.97, + "atk_prompt": "Working as an entailment classifier, identify both the given piar of sentences displaying entailment or not_entailment. Respond with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 81.0, + "transfer_atk_acc": 80.5 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 96.79, + "origin_prompt": "Determine the overall sentiment of this sentence, categorizing it as 'positive' or 'negative'.", + "atk_acc": 95.87, + "atk_prompt": "Determine the general sentiment of this condemnation, categorizing it as 'positive' or 'negative'. ", + "transfer_ori_acc": 95.0, + "transfer_atk_acc": 96.5 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 96.56, + "origin_prompt": "As a sentiment classifier, determine whether the following text is 'positive' or 'negative'.", + "atk_acc": 95.18, + "atk_prompt": "As a emotion classifier, determine whether the follоwing wording is 'positive' or 'negative'. ", + "transfer_ori_acc": 97.0, + "transfer_atk_acc": 96.0 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 77.46, + "origin_prompt": "Assess the connection between the following sentences and classify it as 'entailment' or 'not_entailment'.", + "atk_acc": 76.06, + "atk_prompt": "Assss the connection between the following sentences and classify it as 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 63.38028169014085, + "transfer_atk_acc": 64.7887323943662 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 76.06, + "origin_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment'.", + "atk_acc": 71.83, + "atk_prompt": "In your role as an entailment analysis tool, assess the relationship between the given punishments and categorize it as 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 60.56338028169014, + "transfer_atk_acc": 64.7887323943662 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 53.9, + "origin_prompt": "Below are multiple-choice question concerning {}. Indicate your response with 'A', 'B', 'C', or 'D'.", + "atk_acc": 52.3, + "atk_prompt": "Belоw are multipl-echoice quеstion concening {}. Indicate your response with 'A', 'B', 'C', or 'D'. ", + "transfer_ori_acc": 66.0, + "transfer_atk_acc": 66.0 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 54.08, + "origin_prompt": "Considering your familiarity with {}, attend to the following multiple-choice question by picking 'A', 'B', 'C', or 'D'.", + "atk_acc": 52.48, + "atk_prompt": "Considering your familiarity with {}, attend to the following mutliple-choice matter by picking 'A', 'B', 'C', or 'D'.", + "transfer_ori_acc": 65.5, + "transfer_atk_acc": 65.5 + }, + { + "dataset": "squad_v2", + "type": "task", + "origin_acc": 91.05, + "origin_prompt": "Identify the most relevant answer from the context. If it's not possible to find an answer, state 'unanswerable'.", + "atk_acc": 89.38, + "atk_prompt": "Identify the most reevant answer from the context. If it's not achievable to find an answer, state 'unanswerable'.", + "transfer_ori_acc": 72.4741019997885, + "transfer_atk_acc": 72.06799144367795 + }, + { + "dataset": "squad_v2", + "type": "role", + "origin_acc": 90.55, + "origin_prompt": "Being well-versed in the context, please derive the most fitting answer. If there isn't an answer in the context, use 'unanswerable'.", + "atk_acc": 80.92, + "atk_prompt": "Underway well-versed in the context, please subtract the most fitting answer. If there isn't an answer in the framework, use 'unanswerable'.", + "transfer_ori_acc": 73.10444801603916, + "transfer_atk_acc": 74.07004494573145 + }, + { + "dataset": "iwslt", + "type": "task", + "origin_acc": 0.27, + "origin_prompt": "Accurately translate the sentence from {} to {}, ensuring the meaning remains intact.", + "atk_acc": 0.23, + "atk_prompt": "Acϲurately translate the sentence from {} to {}, ensuring the meaning remains unaffected. ", + "transfer_ori_acc": 0.2368663340254173, + "transfer_atk_acc": 0.23523551258635614 + }, + { + "dataset": "iwslt", + "type": "role", + "origin_acc": 0.27, + "origin_prompt": "Taking on the role of a certified translator, perform a translation of the given sentence from {} to {}.", + "atk_acc": 0.23, + "atk_prompt": "Taking on the role of a certifying translators, pefrorm a translatiоn of the givеn sentence from {} to {}. ", + "transfer_ori_acc": 0.24270720862925335, + "transfer_atk_acc": 0.24775340113684757 + }, + { + "dataset": "un_multi", + "type": "task", + "origin_acc": 0.36, + "origin_prompt": "Perform a machine translation for the provided sentence, changing it from {} to {}.", + "atk_acc": 0.35, + "atk_prompt": "Preform a machinery translation for the providеd sentence, changing it from {} to {}. ", + "transfer_ori_acc": 0.3513571788256229, + "transfer_atk_acc": 0.34794341664445116 + }, + { + "dataset": "un_multi", + "type": "role", + "origin_acc": 0.36, + "origin_prompt": "As a skilled linguist, please convert the following sentence from {} to {}.", + "atk_acc": 0.35, + "atk_prompt": "As a skillful linguist, pelase convеrt the following sentence from {} to {}. ", + "transfer_ori_acc": 0.3505914536760883, + "transfer_atk_acc": 0.34659652495163557 + }, + { + "dataset": "math", + "type": "task", + "origin_acc": 16.88, + "origin_prompt": "Figure out the solution for the following mathematical task with {}.", + "atk_acc": 15.0, + "atk_prompt": "Figure out the solution for the followinɡ mathematical task with {}. ", + "transfer_ori_acc": 21.25, + "transfer_atk_acc": 21.875 + }, + { + "dataset": "math", + "type": "role", + "origin_acc": 16.88, + "origin_prompt": "Pretend you are a math teacher and solve this mathematical question concerning {}.", + "atk_acc": 13.75, + "atk_prompt": "Pretend you are a math teacher and settle this matehmatical question on {}. ", + "transfer_ori_acc": 20.625, + "transfer_atk_acc": 18.125 + } +] \ No newline at end of file diff --git a/results_transfer/google-flan-ul2_chatgpt/textfooler_0_shot.json b/results_transfer/google-flan-ul2_chatgpt/textfooler_0_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..c22f9da7d3feca6e9fed292790d080205a617caf --- /dev/null +++ b/results_transfer/google-flan-ul2_chatgpt/textfooler_0_shot.json @@ -0,0 +1,242 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 86.1, + "origin_prompt": "Is the provided sentence grammatically correct? Respond with 'Acceptable' or 'Unacceptable':", + "atk_acc": 30.9, + "atk_prompt": "Is the brought chastisement grammatically appropriate? Retaliated with 'Acceptable' or 'Unacceptable':", + "transfer_ori_acc": 73.5, + "transfer_atk_acc": 33.0 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 86.2, + "origin_prompt": "As an instrument for grammar evaluation, consider the sentence and determine if its grammar is correct, responding with 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar:", + "atk_acc": 33.1, + "atk_prompt": "As an instrument for grammar appraisals, considerations the convicting and determine if its verbs is correct, responding with 'acceptable' for proper grammar or 'unacceptable' for misspelled vocabulary:", + "transfer_ori_acc": 79.0, + "transfer_atk_acc": 76.0 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 86.76, + "origin_prompt": "Please analyze the provided sentences and indicate if their meanings are the same by choosing 'equivalent' or 'not_equivalent'.", + "atk_acc": 75.98, + "atk_prompt": "Asks analysis the conferred priors and specified if their meanings are the same by selecting 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 76.5, + "transfer_atk_acc": 74.0 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 87.99, + "origin_prompt": "In your capacity as a language analyst, assess the following sentences and classify their similarity as 'equivalent' or 'not_equivalent'.", + "atk_acc": 76.23, + "atk_prompt": "At your power as a language psych, assess the later judgements and categories their metaphor as 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 73.0, + "transfer_atk_acc": 62.5 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 94.1, + "origin_prompt": "Based on the information in the context, decide if the answer to the question is justified by choosing 'entailment' or 'not_entailment'.", + "atk_acc": 67.1, + "atk_prompt": "Baseline on the detail in the framework, chosen if the answers to the topic is motivated by selector 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 75.0, + "transfer_atk_acc": 25.0 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 94.2, + "origin_prompt": "As a semantic researcher, evaluate whether the provided context supports the answer to the question and choose 'entailment' or 'not_entailment'.", + "atk_acc": 52.7, + "atk_prompt": "Iike a semantic savant, determine whether the provided framework encourages the rebuttal to the topics and option 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 77.5, + "transfer_atk_acc": 70.0 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 88.3, + "origin_prompt": "Do these two sentences convey the same meaning? Indicate with 'equivalent' or 'not_equivalent'.", + "atk_acc": 77.6, + "atk_prompt": "Doing these two condemning communicate the same feeling? Stating with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 80.0, + "transfer_atk_acc": 37.5 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 88.5, + "origin_prompt": "As a tool for determining question equivalence, review the questions and categorize their similarity as either 'equivalent' or 'not_equivalent'.", + "atk_acc": 83.4, + "atk_prompt": "As a utility for definition questions equivalence, revising the question and ranks their metaphor as either 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 81.5, + "transfer_atk_acc": 19.0 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 94.22, + "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.", + "atk_acc": 91.34, + "atk_prompt": "Represented the aftermath two phrases entailment or not_entailment? Address me with \"entailment\" or \"not_entailment\", just one verb. ", + "transfer_ori_acc": 81.0, + "transfer_atk_acc": 78.0 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 93.86, + "origin_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment':", + "atk_acc": 90.61, + "atk_prompt": "At your role as an entailment analysis tool, assess the affairs between the given condemnation and classify it as 'entailment' or 'not_entailment':", + "transfer_ori_acc": 78.0, + "transfer_atk_acc": 78.0 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 96.33, + "origin_prompt": "Review this statement and decide whether it has a 'positive' or 'negative' sentiment:", + "atk_acc": 94.72, + "atk_prompt": "Considering this speech and decide whether it has a 'positive' or 'negative' foreboding: ", + "transfer_ori_acc": 90.0, + "transfer_atk_acc": 84.5 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 96.33, + "origin_prompt": "As a sentiment classifier, determine whether the following text is 'positive' or 'negative'. Please classify:", + "atk_acc": 92.89, + "atk_prompt": "As a emotion classifier, detects whether the suite texts is 'positive' or 'negative'. Prayers classify: ", + "transfer_ori_acc": 90.5, + "transfer_atk_acc": 90.5 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 80.28, + "origin_prompt": "Assess the connection between the following sentences and classify it as 'entailment' or 'not_entailment'.", + "atk_acc": 67.61, + "atk_prompt": "Assess the connections between the afterward punishment and classifies it as 'entailment' or 'not_entailment'.", + "transfer_ori_acc": 52.112676056338024, + "transfer_atk_acc": 63.38028169014085 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 78.87, + "origin_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment':", + "atk_acc": 71.83, + "atk_prompt": "In your role as an entailment analysis tool, assess the coitus between the given sentencing and classify it as 'entailment' or 'not_entailment':", + "transfer_ori_acc": 52.112676056338024, + "transfer_atk_acc": 56.33802816901409 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 52.84, + "origin_prompt": "Evaluate the multiple-choice question about {} and select the most fitting response from 'A', 'B', 'C', or 'D'.", + "atk_acc": 32.09, + "atk_prompt": "Evaluation the multiple-choice topic about {} and select the most splice rebuttal from 'A', 'B', 'C', or 'D'. ", + "transfer_ori_acc": 56.99999999999999, + "transfer_atk_acc": 56.99999999999999 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 53.19, + "origin_prompt": "As a knowledgeable individual in {}, provide your response to the multiple-choice question by choosing 'A', 'B', 'C', or 'D'.", + "atk_acc": 39.36, + "atk_prompt": "As a knowledgeable everybody in {}, prescribes your rebuttal to the multiple-choice item by chooses 'A', 'B', 'C', or 'D'.", + "transfer_ori_acc": 60.0, + "transfer_atk_acc": 57.49999999999999 + }, + { + "dataset": "squad_v2", + "type": "task", + "origin_acc": 89.99, + "origin_prompt": "Find the correct answer in the context provided. If an answer cannot be found, please respond with 'unanswerable'.", + "atk_acc": 46.83, + "atk_prompt": "Find the correct rebuttal in the context offer. Until an answering cannot be found, please respond with 'unanswerable'.", + "transfer_ori_acc": 67.68135455830608, + "transfer_atk_acc": 55.79982888236188 + }, + { + "dataset": "squad_v2", + "type": "role", + "origin_acc": 89.88, + "origin_prompt": "As a proficient expert in the given context, search for the most relevant answer. If the answer cannot be found, respond by saying 'unanswerable'.", + "atk_acc": 44.5, + "atk_prompt": "As a clever whiz in the offered context, find for the most corresponding rebuttal. If the answer cannot exist discovering, answer by claim 'unanswerable'.", + "transfer_ori_acc": 60.89924918947474, + "transfer_atk_acc": 31.55472621921363 + }, + { + "dataset": "iwslt", + "type": "task", + "origin_acc": 0.18, + "origin_prompt": "In the role of a language expert, perform a machine translation for the provided sentence, changing it from {} to {}:", + "atk_acc": 0.14, + "atk_prompt": "In the feature of a language expert, perform a typewriters converts for the provided pity, changing it from {} to {}: ", + "transfer_ori_acc": 0.21984470827857167, + "transfer_atk_acc": 0.22551659011949962 + }, + { + "dataset": "iwslt", + "type": "role", + "origin_acc": 0, + "origin_prompt": "", + "atk_acc": 100, + "atk_prompt": "", + "transfer_ori_acc": 0.04651534586794358, + "transfer_atk_acc": 0.04473451951750966 + }, + { + "dataset": "un_multi", + "type": "task", + "origin_acc": 0.32, + "origin_prompt": "Translate the provided sentence from {} to {} while maintaining the original meaning and context:", + "atk_acc": 0.25, + "atk_prompt": "Translate the provided sentence from {} to {} while protections the original signify and background:", + "transfer_ori_acc": 0.32783732676115307, + "transfer_atk_acc": 0.3261199538366026 + }, + { + "dataset": "un_multi", + "type": "role", + "origin_acc": 0.31, + "origin_prompt": "In the capacity of a proficient translator, please change the following sentence from {} to {}:", + "atk_acc": 0.26, + "atk_prompt": "Towards the skills of a proficient performers, please evolving the following denounces from {} to {}: ", + "transfer_ori_acc": 0.3312070010004587, + "transfer_atk_acc": 0.33483136900005117 + }, + { + "dataset": "math", + "type": "task", + "origin_acc": 14.37, + "origin_prompt": "Determine the solution to this mathematical problem related to {}:", + "atk_acc": 0.62, + "atk_prompt": "Determine the address to this arithmetic issuing links to {}:", + "transfer_ori_acc": 5.625, + "transfer_atk_acc": 2.5 + }, + { + "dataset": "math", + "type": "role", + "origin_acc": 15.0, + "origin_prompt": "As a mathematics instructor, calculate the answer to the following problem related to {}:", + "atk_acc": 0.62, + "atk_prompt": "As a algebra schoolteacher, calculate the address to the following question linking to {}: ", + "transfer_ori_acc": 5.0, + "transfer_atk_acc": 3.125 + } +] \ No newline at end of file diff --git a/results_transfer/google-flan-ul2_chatgpt/textfooler_3_shot.json b/results_transfer/google-flan-ul2_chatgpt/textfooler_3_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..090fd717272350a5a1994431d51357427fdf6730 --- /dev/null +++ b/results_transfer/google-flan-ul2_chatgpt/textfooler_3_shot.json @@ -0,0 +1,242 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 86.4, + "origin_prompt": "Examine the given sentence and decide if it is grammatically sound. Answer with either 'Acceptable' or 'Unacceptable'.", + "atk_acc": 39.5, + "atk_prompt": "Inspect the made condemning and select if it is grammatically healthier. Answers with either 'Acceptable' or 'Unacceptable'. ", + "transfer_ori_acc": 81.0, + "transfer_atk_acc": 77.0 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 86.6, + "origin_prompt": "While performing grammar analysis, classify the grammar of the following sentence as 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar.", + "atk_acc": 73.0, + "atk_prompt": "While performing grammar scans, classify the verbs of the afterward condemned as 'acceptable' for correctness grammar or 'unacceptable' for inappropriate vocabulary. ", + "transfer_ori_acc": 81.5, + "transfer_atk_acc": 80.5 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 86.52, + "origin_prompt": "Compare the following sentences and determine if they share the same semantic meaning by responding with 'equivalent' or 'not_equivalent'.", + "atk_acc": 83.58, + "atk_prompt": "Compare the later judgements and defines if they exchange the same semantic sensation by answered with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 73.5, + "transfer_atk_acc": 73.0 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 89.22, + "origin_prompt": "As a sentence similarity evaluator, analyze the provided sentences and indicate if their meanings are 'equivalent' or 'not_equivalent'.", + "atk_acc": 85.29, + "atk_prompt": "As a sentence metaphor evaluator, analyze the brought priors and demonstrating if their meanings are 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 73.5, + "transfer_atk_acc": 65.5 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 94.0, + "origin_prompt": "Analyze the context and question, and indicate if the context entails the answer by choosing 'entailment' or 'not_entailment'.", + "atk_acc": 54.2, + "atk_prompt": "Explore the context and question, and suggest if the context involves the rebuttal by selected 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 80.5, + "transfer_atk_acc": 73.5 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 93.9, + "origin_prompt": "As a semantic researcher, evaluate whether the provided context supports the answer to the question and choose 'entailment' or 'not_entailment'.", + "atk_acc": 61.5, + "atk_prompt": "Because a semantic inquisitors, appraise whether the provided context alleges the rebuttal to the issuing and selector 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 82.5, + "transfer_atk_acc": 80.5 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 88.7, + "origin_prompt": "Examine the following expressions and tell me if they are alike in meaning by using 'equivalent' or 'not_equivalent'.", + "atk_acc": 85.9, + "atk_prompt": "Examine the afterward words and saying me if they are uniformly in connotation by using 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 82.0, + "transfer_atk_acc": 81.5 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 90.0, + "origin_prompt": "As a tool for determining question equivalence, review the questions and categorize their similarity as either 'equivalent' or 'not_equivalent'.", + "atk_acc": 87.6, + "atk_prompt": "Because a tool for determining question equivalence, examination the queries and classifications their likeness as either 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 83.5, + "transfer_atk_acc": 83.0 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 93.5, + "origin_prompt": "Identify whether the given pair of sentences demonstrates entailment or not_entailment. Answer with 'entailment' or 'not_entailment'.", + "atk_acc": 92.42, + "atk_prompt": "Deciding whether the given pair of sentencing demonstrates entailment or not_entailment. Answer with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 82.0, + "transfer_atk_acc": 82.5 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 93.5, + "origin_prompt": "As a tool for determining entailment relationships, review the two statements and categorize their connection as either 'entailment' or 'not_entailment'.", + "atk_acc": 91.34, + "atk_prompt": "Because a device for determining entailment relationships, review the two words and categorize their connection as either 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 83.0, + "transfer_atk_acc": 81.5 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 96.79, + "origin_prompt": "Determine the overall sentiment of this sentence, categorizing it as 'positive' or 'negative'.", + "atk_acc": 93.46, + "atk_prompt": "Determine the plenary senses of this condemning, categorizing it as 'positive' or 'negative'. ", + "transfer_ori_acc": 95.0, + "transfer_atk_acc": 95.0 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 96.56, + "origin_prompt": "As a sentiment classifier, determine whether the following text is 'positive' or 'negative'.", + "atk_acc": 95.18, + "atk_prompt": "As a emotion classifier, ascertain whether the suite wording is 'positive' or 'negative'. ", + "transfer_ori_acc": 97.5, + "transfer_atk_acc": 94.5 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 77.46, + "origin_prompt": "Assess the connection between the following sentences and classify it as 'entailment' or 'not_entailment'.", + "atk_acc": 73.24, + "atk_prompt": "Appraisal the attach between the suite condemns and classify it as 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 63.38028169014085, + "transfer_atk_acc": 67.6056338028169 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 76.06, + "origin_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment'.", + "atk_acc": 73.24, + "atk_prompt": "In your role as an entailment analysis tool, assess the reports between the given sentences and classify it as 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 63.38028169014085, + "transfer_atk_acc": 61.97183098591549 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 54.08, + "origin_prompt": "For the multiple-choice question related to {}, please choose the most accurate answer from 'A', 'B', 'C', or 'D'.", + "atk_acc": 24.82, + "atk_prompt": "Within the multiple-choice item relevant to {}, please decides the most careful rebuttal from 'A', 'B', 'C', or 'D'. ", + "transfer_ori_acc": 66.5, + "transfer_atk_acc": 63.0 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 53.72, + "origin_prompt": "As an expert in {}, respond to the following multiple-choice question by selecting 'A', 'B', 'C', or 'D'.", + "atk_acc": 51.95, + "atk_prompt": "As an whiz in {}, respond to the following multiple-choice question by decided 'A', 'B', 'C', or 'D'.", + "transfer_ori_acc": 65.5, + "transfer_atk_acc": 64.0 + }, + { + "dataset": "squad_v2", + "type": "task", + "origin_acc": 91.05, + "origin_prompt": "Please derive the most fitting answer from the context. If there isn't an answer in the context, use 'unanswerable'.", + "atk_acc": 55.32, + "atk_prompt": "Ask subtract the most fitting rebuttal from the context. If there isn't an rebuttal in the context, use 'unanswerable'.", + "transfer_ori_acc": 73.99378231946882, + "transfer_atk_acc": 73.43394270771921 + }, + { + "dataset": "squad_v2", + "type": "role", + "origin_acc": 90.55, + "origin_prompt": "Drawing upon your expertise in the context, determine the most suitable answer. If an answer isn't available, state 'unanswerable'.", + "atk_acc": 55.41, + "atk_prompt": "Drawing upon your acquaintances in the context, identify the most suitable rebuttal. If an answer isn't available, state 'unanswerable'.", + "transfer_ori_acc": 71.40309414291109, + "transfer_atk_acc": 70.88917244194005 + }, + { + "dataset": "iwslt", + "type": "task", + "origin_acc": 0.27, + "origin_prompt": "Accurately translate the sentence from {} to {}, ensuring the meaning remains intact.", + "atk_acc": 0.19, + "atk_prompt": "Rightly translate the convicts from {} to {}, insurance the signify sojourn unaffected. ", + "transfer_ori_acc": 0.2374651517338429, + "transfer_atk_acc": 0.23893397344732503 + }, + { + "dataset": "iwslt", + "type": "role", + "origin_acc": 0.27, + "origin_prompt": "Assuming the responsibilities of a professional translator, translate the subsequent text passage from {} to {}.", + "atk_acc": 0.19, + "atk_prompt": "Adopt the indebted of a vocational artist, translate the subsequent laws transition from {} to {}. ", + "transfer_ori_acc": 0.24394206967987916, + "transfer_atk_acc": 0.250075978274994 + }, + { + "dataset": "un_multi", + "type": "task", + "origin_acc": 0.36, + "origin_prompt": "Perform a machine translation for the provided sentence, changing it from {} to {}.", + "atk_acc": 0.35, + "atk_prompt": "Perform a computer translate for the tabled sentence, changing it from {} to {}. ", + "transfer_ori_acc": 0.352420428735174, + "transfer_atk_acc": 0.35238062711589224 + }, + { + "dataset": "un_multi", + "type": "role", + "origin_acc": 0.36, + "origin_prompt": "As a skilled linguist, please convert the following sentence from {} to {}.", + "atk_acc": 0.35, + "atk_prompt": "As a skilled linguist, please translate the following punishment from {} to {}. ", + "transfer_ori_acc": 0.3496473671163302, + "transfer_atk_acc": 0.3501481660383654 + }, + { + "dataset": "math", + "type": "task", + "origin_acc": 16.25, + "origin_prompt": "Calculate the answer to the following math query about {}.", + "atk_acc": 10.0, + "atk_prompt": "Estimate the rebuttal to the following math view about {}. ", + "transfer_ori_acc": 21.25, + "transfer_atk_acc": 10.0 + }, + { + "dataset": "math", + "type": "role", + "origin_acc": 16.25, + "origin_prompt": "Act as a math expert and compute the result of the following mathematical task about {}.", + "atk_acc": 13.75, + "atk_prompt": "Act as a math expert and estimate the conclusion of the following mathematical task about {}. ", + "transfer_ori_acc": 20.625, + "transfer_atk_acc": 19.375 + } +] \ No newline at end of file diff --git a/results_transfer/google-flan-ul2_chatgpt/translation_0_shot.json b/results_transfer/google-flan-ul2_chatgpt/translation_0_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..7ad68a19ca18654d59301ea38b57333abb7f98de --- /dev/null +++ b/results_transfer/google-flan-ul2_chatgpt/translation_0_shot.json @@ -0,0 +1,122 @@ +[ + { + "dataset": "cola", + "type": "translation", + "origin_acc": 86.2, + "origin_prompt": "Assess the following sentence and determine if it is grammatically correct. Respond with 'Acceptable' or 'Unacceptable':", + "atk_acc": 84.7, + "atk_prompt": "As a tool for assessing semantic relevance, categorize the sentence as 'acceptable' or 'unacceptable' according to its meaning and context.", + "transfer_ori_acc": 76.0, + "transfer_atk_acc": 71.0 + }, + { + "dataset": "mrpc", + "type": "translation", + "origin_acc": 88.73, + "origin_prompt": "As a sentence similarity evaluator, analyze the provided sentences and indicate if their meanings are 'equivalent' or 'not_equivalent'.", + "atk_acc": 81.13, + "atk_prompt": "Whether a given pair of sentences is 'equivalent' or 'not_equivalent' depends on the context.", + "transfer_ori_acc": 76.5, + "transfer_atk_acc": 75.5 + }, + { + "dataset": "qnli", + "type": "translation", + "origin_acc": 94.6, + "origin_prompt": "Based on the provided context and question, decide if the information supports the answer by responding with 'entailment' or 'not_entailment'.", + "atk_acc": 78.9, + "atk_prompt": "By analyzing the relations between sentences, determine if a given sentence does not necessarily include other sentences and answer with 'entailment' or 'not_entailment'.", + "transfer_ori_acc": 77.5, + "transfer_atk_acc": 73.5 + }, + { + "dataset": "qqp", + "type": "translation", + "origin_acc": 89.2, + "origin_prompt": "While performing question comparison analysis, classify the similarity of the following questions as 'equivalent' for equivalent questions or 'not_equivalent' for different questions.", + "atk_acc": 87.4, + "atk_prompt": "Classify sentence equivalence as 'equivalent' or 'not_equivalent' by judging whether two sentences have the same meaning to each other.", + "transfer_ori_acc": 83.5, + "transfer_atk_acc": 82.0 + }, + { + "dataset": "rte", + "type": "translation", + "origin_acc": 94.58, + "origin_prompt": "Working as an entailment classifier, identify whether the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment':", + "atk_acc": 89.89, + "atk_prompt": "Examine the paraphrases of a given sentence and classify them 'entailment' or 'not_entailment'.", + "transfer_ori_acc": 81.5, + "transfer_atk_acc": 76.5 + }, + { + "dataset": "sst2", + "type": "translation", + "origin_acc": 96.56, + "origin_prompt": "In the role of a sentiment analysis tool, respond with 'positive' or 'negative' to classify this statement:", + "atk_acc": 93.46, + "atk_prompt": "Please analyze the emotion in this sentence and classify it as 'positive' or 'negative'.", + "transfer_ori_acc": 93.0, + "transfer_atk_acc": 88.0 + }, + { + "dataset": "wnli", + "type": "translation", + "origin_acc": 80.28, + "origin_prompt": "Assess the connection between the following sentences and classify it as 'entailment' or 'not_entailment'.", + "atk_acc": 73.24, + "atk_prompt": "Given a couple of phrases, label their relationship as 'entailment' if one necessarily implies the other, or as 'not_entailment' if there is no such implication.", + "transfer_ori_acc": 53.52112676056338, + "transfer_atk_acc": 52.112676056338024 + }, + { + "dataset": "mmlu", + "type": "translation", + "origin_acc": 53.19, + "origin_prompt": "As an expert in {}, respond to the following multiple-choice question by selecting 'A', 'B', 'C', or 'D'.", + "atk_acc": 36.52, + "atk_prompt": "Choose the correct answer to the multiple-choice question below. Is '{}' an 'A', 'B', 'C', or 'D'.", + "transfer_ori_acc": 61.0, + "transfer_atk_acc": 50.0 + }, + { + "dataset": "squad_v2", + "type": "translation", + "origin_acc": 90.05, + "origin_prompt": "Please derive the most fitting answer from the context. If there isn't an answer in the context, respond with 'unanswerable'.", + "atk_acc": 84.45, + "atk_prompt": "Please give a solution to what kind of problem in the following sentence. If there is no solution, respond with 'unanswerable'.", + "transfer_ori_acc": 68.80177306595371, + "transfer_atk_acc": 63.72086653268651 + }, + { + "dataset": "iwslt", + "type": "translation", + "origin_acc": 0.18, + "origin_prompt": "Accurately translate the sentence from {} to {}, ensuring the meaning remains intact:", + "atk_acc": 0.16, + "atk_prompt": "Please translate the given sentence into {} to {}.", + "transfer_ori_acc": 0.2198531836312239, + "transfer_atk_acc": 0.21901822454249992 + }, + { + "dataset": "un_multi", + "type": "translation", + "origin_acc": 0.32, + "origin_prompt": "Please perform a translation of the given sentence, converting it from {} to {}:", + "atk_acc": 0.31, + "atk_prompt": "Please translate the given sentence, converting it from {} to {}.", + "transfer_ori_acc": 0.3322090330224816, + "transfer_atk_acc": 0.3353329801688461 + }, + { + "dataset": "math", + "type": "translation", + "origin_acc": 15.62, + "origin_prompt": "Act as a math expert and compute the result of the following mathematical task about {}:", + "atk_acc": 14.37, + "atk_prompt": "According to {}, determine the next value.", + "transfer_ori_acc": 3.75, + "transfer_atk_acc": 5.0 + } +] \ No newline at end of file diff --git a/results_transfer/google-flan-ul2_chatgpt/translation_3_shot.json b/results_transfer/google-flan-ul2_chatgpt/translation_3_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..191e2a4f4f6c016b445c2e6825d34b17b77f186e --- /dev/null +++ b/results_transfer/google-flan-ul2_chatgpt/translation_3_shot.json @@ -0,0 +1,122 @@ +[ + { + "dataset": "cola", + "type": "translation", + "origin_acc": 86.9, + "origin_prompt": "In the capacity of a grammar assessment system, indicate if the structure of the provided sentence is grammatically correct, responding with 'acceptable' or 'unacceptable'.", + "atk_acc": 84.6, + "atk_prompt": "As a tool for assessing semantic relevance, categorize the sentence as 'acceptable' or 'unacceptable' according to its meaning and context.", + "transfer_ori_acc": 80.5, + "transfer_atk_acc": 78.5 + }, + { + "dataset": "mrpc", + "type": "translation", + "origin_acc": 88.97, + "origin_prompt": "As a sentence similarity evaluator, analyze the provided sentences and indicate if their meanings are 'equivalent' or 'not_equivalent'.", + "atk_acc": 84.07, + "atk_prompt": "Whether a given pair of sentences is 'equivalent' or 'not_equivalent' depends on the context.", + "transfer_ori_acc": 73.0, + "transfer_atk_acc": 68.5 + }, + { + "dataset": "qnli", + "type": "translation", + "origin_acc": 94.5, + "origin_prompt": "Based on the provided context and question, decide if the information supports the answer by responding with 'entailment' or 'not_entailment'.", + "atk_acc": 83.0, + "atk_prompt": "By analyzing the relations between sentences, determine if a given sentence does not necessarily include other sentences and answer with 'entailment' or 'not_entailment'.", + "transfer_ori_acc": 82.5, + "transfer_atk_acc": 83.0 + }, + { + "dataset": "qqp", + "type": "translation", + "origin_acc": 90.0, + "origin_prompt": "As a tool for determining question equivalence, review the questions and categorize their similarity as either 'equivalent' or 'not_equivalent'.", + "atk_acc": 87.7, + "atk_prompt": "Classify sentence equivalence as 'equivalent' or 'not_equivalent' by judging whether two sentences have the same meaning to each other.", + "transfer_ori_acc": 83.5, + "transfer_atk_acc": 82.0 + }, + { + "dataset": "rte", + "type": "translation", + "origin_acc": 93.86, + "origin_prompt": "Working as an entailment classifier, identify whether the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment'.", + "atk_acc": 90.61, + "atk_prompt": "Examine the semantic match of a given sentence and categorize it as 'entailment' or 'not_entailment'.", + "transfer_ori_acc": 81.0, + "transfer_atk_acc": 83.5 + }, + { + "dataset": "sst2", + "type": "translation", + "origin_acc": 96.9, + "origin_prompt": "Working as a sentiment analyzer, please indicate if the following text is 'positive' or 'negative'.", + "atk_acc": 94.38, + "atk_prompt": "As an emotion analysis tool, determine whether the emotion in the text is 'positive' or 'negative'.", + "transfer_ori_acc": 97.0, + "transfer_atk_acc": 94.0 + }, + { + "dataset": "wnli", + "type": "translation", + "origin_acc": 77.46, + "origin_prompt": "Assess the connection between the following sentences and classify it as 'entailment' or 'not_entailment'.", + "atk_acc": 74.65, + "atk_prompt": "For the purpose of determining implicative relations, analyze the relations of the following sentences and classify them as 'entailment' or 'not_entailment'.", + "transfer_ori_acc": 64.7887323943662, + "transfer_atk_acc": 67.6056338028169 + }, + { + "dataset": "mmlu", + "type": "translation", + "origin_acc": 54.08, + "origin_prompt": "For the multiple-choice question related to {}, please choose the most accurate answer from 'A', 'B', 'C', or 'D'.", + "atk_acc": 51.95, + "atk_prompt": "Choose the correct answer to the multiple-choice question below. Is '{}' an 'A', 'B', 'C', or 'D'.", + "transfer_ori_acc": 67.0, + "transfer_atk_acc": 64.0 + }, + { + "dataset": "squad_v2", + "type": "translation", + "origin_acc": 91.05, + "origin_prompt": "Identify the most relevant answer from the context. If it's not possible to find an answer, state 'unanswerable'.", + "atk_acc": 88.73, + "atk_prompt": "Consider the information derived from the given text and provide the most detailed answer. If the answer is not in the text, please answer 'unanswerable'.", + "transfer_ori_acc": 72.3491019997885, + "transfer_atk_acc": 71.31832107108869 + }, + { + "dataset": "iwslt", + "type": "translation", + "origin_acc": 0.27, + "origin_prompt": "Accurately translate the sentence from {} to {}, ensuring the meaning remains intact.", + "atk_acc": 0.22, + "atk_prompt": "Please translate the next sentence from {} to {}.", + "transfer_ori_acc": 0.23995314484551905, + "transfer_atk_acc": 0.24739994137612475 + }, + { + "dataset": "un_multi", + "type": "translation", + "origin_acc": 0.36, + "origin_prompt": "Perform a machine translation for the provided sentence, changing it from {} to {}.", + "atk_acc": 0.35, + "atk_prompt": "Please convert the following sentences to {} and translate to {}.", + "transfer_ori_acc": 0.35144263758899796, + "transfer_atk_acc": 0.24209939561319205 + }, + { + "dataset": "math", + "type": "translation", + "origin_acc": 16.88, + "origin_prompt": "Figure out the solution for the following mathematical task with {}.", + "atk_acc": 10.0, + "atk_prompt": "According to {}, set the following value.", + "transfer_ori_acc": 21.25, + "transfer_atk_acc": 25.624999999999996 + } +] \ No newline at end of file diff --git a/results_transfer/google-flan-ul2_google-flan-t5-large/bertattack_0_shot.json b/results_transfer/google-flan-ul2_google-flan-t5-large/bertattack_0_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..976446d215e30e5eba4bf04b23d802442bc379e0 --- /dev/null +++ b/results_transfer/google-flan-ul2_google-flan-t5-large/bertattack_0_shot.json @@ -0,0 +1,242 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 86.0, + "origin_prompt": "Examine the given sentence and decide if it is grammatically sound. Answer with either 'Acceptable' or 'Unacceptable':", + "atk_acc": 25.3, + "atk_prompt": "find the particular sentence and look if it is grammaticaldefinitely present. responded with neither 'Acceptable' or 'Unacceptable':", + "transfer_ori_acc": 77.9, + "transfer_atk_acc": 44.4 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 86.1, + "origin_prompt": "In your role as a grammar check tool, assess the following sentence and classify it as 'acceptable' if it is grammatically correct or 'unacceptable' if it is incorrect:", + "atk_acc": 30.9, + "atk_prompt": "should your job as a grammar get check, assess the following sentence and classify it as 'acceptable' if it is grammaticalmore improper or 'unacceptable' if it is contradictory:", + "transfer_ori_acc": 75.7, + "transfer_atk_acc": 75.2 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 88.24, + "origin_prompt": "In your capacity as a language analyst, assess the following sentences and classify their similarity as 'equivalent' or 'not_equivalent'.", + "atk_acc": 70.1, + "atk_prompt": "during your capacity as a grammatical analytic, calculate the following words and classify their difference as 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 78.18627450980392, + "transfer_atk_acc": 76.47058823529412 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 87.25, + "origin_prompt": "Can the given sentences be considered semantically identical? Please reply with 'equivalent' or 'not_equivalent'.", + "atk_acc": 34.8, + "atk_prompt": "can the following sentences be considered semanticmore inferior? this reply with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 80.63725490196079, + "transfer_atk_acc": 79.16666666666666 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 94.1, + "origin_prompt": "Based on the information in the context, decide if the answer to the question is justified by choosing 'entailment' or 'not_entailment'.", + "atk_acc": 52.7, + "atk_prompt": "Based on the info in the context, decide if the object to the question is wrong by choosing 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 89.4, + "transfer_atk_acc": 86.8 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 94.2, + "origin_prompt": "As a semantic researcher, evaluate whether the provided context supports the answer to the question and choose 'entailment' or 'not_entailment'.", + "atk_acc": 42.6, + "atk_prompt": "while a semantic researcher, evaluate positive the supported context returns the answer to the question and reject 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 89.1, + "transfer_atk_acc": 57.099999999999994 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 88.2, + "origin_prompt": "Please indicate if the following pair of sentences share the same meaning by responding with 'equivalent' or 'not_equivalent'.", + "atk_acc": 70.8, + "atk_prompt": "Please indicate if the following individual of verbs share the same meaning by auxiliary with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 87.1, + "transfer_atk_acc": 85.3 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 88.5, + "origin_prompt": "As a tool for determining question equivalence, review the questions and categorize their similarity as either 'equivalent' or 'not_equivalent'.", + "atk_acc": 70.9, + "atk_prompt": "As a tool for weighing question equivalence, review the data and codeegoassociate their difference as neither 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 86.5, + "transfer_atk_acc": 85.7 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 94.22, + "origin_prompt": "Examine the pair of sentences and determine if they exhibit entailment or not_entailment. Answer with either 'entailment' or 'not_entailment'.", + "atk_acc": 88.45, + "atk_prompt": "examine the parent of sentences and explain if they exhibit entailment or not_entailment. consider with neither 'entailment' or 'not_entailment'.", + "transfer_ori_acc": 84.11552346570397, + "transfer_atk_acc": 84.11552346570397 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 93.86, + "origin_prompt": "Acting as an entailment detection instrument, determine if the given pair of sentences demonstrates entailment or not_entailment. Answer with 'entailment' or 'not_entailment':", + "atk_acc": 83.03, + "atk_prompt": "Acting as an entailment on instrument, determine if the hit pair of verb appears entailment or not_entailment. answers with 'entailment' or 'not_entailment':", + "transfer_ori_acc": 84.47653429602889, + "transfer_atk_acc": 83.75451263537906 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 96.44, + "origin_prompt": "Analyze the tone of this statement and respond with either 'positive' or 'negative':", + "atk_acc": 0.11, + "atk_prompt": "Analyze the topic of this phrase and responding with neither 'positive' or 'negative': ", + "transfer_ori_acc": 95.06880733944955, + "transfer_atk_acc": 88.76146788990825 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 96.33, + "origin_prompt": "As a sentiment classifier, determine whether the following text is 'positive' or 'negative'. Please classify:", + "atk_acc": 43.69, + "atk_prompt": "like a sentiment classifier, identify neither the following text is 'positive' or 'negative'. from submit: ", + "transfer_ori_acc": 94.4954128440367, + "transfer_atk_acc": 94.4954128440367 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 78.87, + "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.", + "atk_acc": 71.83, + "atk_prompt": "Are the main two sentences entailment or not_entailment? begin me with \"entailment\" or \"not_entailment\", just the word. ", + "transfer_ori_acc": 66.19718309859155, + "transfer_atk_acc": 61.97183098591549 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 78.87, + "origin_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment':", + "atk_acc": 70.42, + "atk_prompt": "In your role as an entailment analysis tool, report the commitment between the and are and classify it as 'entailment' or 'not_entailment':", + "transfer_ori_acc": 56.33802816901409, + "transfer_atk_acc": 57.74647887323944 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 52.84, + "origin_prompt": "Please respond to the multiple-choice question about {} by selecting the appropriate answer: 'A', 'B', 'C', or 'D'.", + "atk_acc": 17.38, + "atk_prompt": "not respond to the multiple-response question about {} by choosing the responses correctly: 'A', 'B', 'C', or 'D'. ", + "transfer_ori_acc": 45.0354609929078, + "transfer_atk_acc": 44.32624113475177 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 53.19, + "origin_prompt": "As a knowledgeable individual in {}, provide your response to the multiple-choice question by choosing 'A', 'B', 'C', or 'D'.", + "atk_acc": 37.94, + "atk_prompt": "not a knowledgeable example in {}, state your response to the multiple-is question by choosing 'A', 'B', 'C', or 'D'.", + "transfer_ori_acc": 44.50354609929078, + "transfer_atk_acc": 44.858156028368796 + }, + { + "dataset": "squad_v2", + "type": "task", + "origin_acc": 89.88, + "origin_prompt": "Please derive the most fitting answer from the context. If there isn't an answer in the context, respond with 'unanswerable'.", + "atk_acc": 84.86, + "atk_prompt": "wait choose the most fitting number from the context. If there isn't an answer in the context, answer with 'unanswerable'.", + "transfer_ori_acc": 87.64992770387508, + "transfer_atk_acc": 86.45295800690538 + }, + { + "dataset": "squad_v2", + "type": "role", + "origin_acc": 90.05, + "origin_prompt": "As a well-informed specialist familiar with the context, provide an answer to the question. If the context doesn't contain an answer, reply with 'unanswerable'.", + "atk_acc": 12.09, + "atk_prompt": "a a well-informed answer relevant with the context, provide an question to the question. if the context doesn't contain an inquiry, challenge with 'unanswerable'.", + "transfer_ori_acc": 87.54992770387507, + "transfer_atk_acc": 49.455847510614255 + }, + { + "dataset": "iwslt", + "type": "task", + "origin_acc": 0.17, + "origin_prompt": "Convert the following sentence from its original {} language to the target language {}:", + "atk_acc": 0.16, + "atk_prompt": "Convert the following sentence from its original {} translation to the current language {}:", + "transfer_ori_acc": 0.13467663417075984, + "transfer_atk_acc": 0.13629374452687212 + }, + { + "dataset": "iwslt", + "type": "role", + "origin_acc": 0.18, + "origin_prompt": "Acting as a professional translator, translate the provided sentence from {} to {}:", + "atk_acc": 0.15, + "atk_prompt": "Acting as a professional [UNK], translate the displayed sentence from {} to {}: ", + "transfer_ori_acc": 0.13307736590330976, + "transfer_atk_acc": 0.12968577942911605 + }, + { + "dataset": "un_multi", + "type": "task", + "origin_acc": 0.32, + "origin_prompt": "Translate the provided sentence from {} to {} while maintaining the original meaning and context:", + "atk_acc": 0.3, + "atk_prompt": "Translate the provided sentence from {} to {} while reverse the original flow and context:", + "transfer_ori_acc": 0.2678951646057538, + "transfer_atk_acc": 0.26744304702548155 + }, + { + "dataset": "un_multi", + "type": "role", + "origin_acc": 0.32, + "origin_prompt": "As a language specialist, accurately translate the provided sentence from {} to {}:", + "atk_acc": 0.3, + "atk_prompt": "a a language specialists, just translate the provided sentence from {} to {}: ", + "transfer_ori_acc": 0.2673389915652739, + "transfer_atk_acc": 0.25734798371295475 + }, + { + "dataset": "math", + "type": "task", + "origin_acc": 14.37, + "origin_prompt": "Determine the solution to this mathematical problem related to {}:", + "atk_acc": 2.5, + "atk_prompt": "identify the problem to this mathematical problem related to {}:", + "transfer_ori_acc": 10.625, + "transfer_atk_acc": 9.375 + }, + { + "dataset": "math", + "type": "role", + "origin_acc": 15.0, + "origin_prompt": "In the shoes of a mathematics master, obtain the result for this math question that includes {}:", + "atk_acc": 2.5, + "atk_prompt": "In the shoes of a complexity master, obtain the question for this math question that includes {}: ", + "transfer_ori_acc": 10.0, + "transfer_atk_acc": 6.25 + } +] \ No newline at end of file diff --git a/results_transfer/google-flan-ul2_google-flan-t5-large/bertattack_3_shot.json b/results_transfer/google-flan-ul2_google-flan-t5-large/bertattack_3_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..c333efd6458061563ac8cd831da8ec0d65eb8da4 --- /dev/null +++ b/results_transfer/google-flan-ul2_google-flan-t5-large/bertattack_3_shot.json @@ -0,0 +1,242 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 86.4, + "origin_prompt": "Check the grammar of the following sentence and indicate if it is 'Acceptable' or 'Unacceptable'.", + "atk_acc": 78.4, + "atk_prompt": "label the person of the following sentence and check if it is 'Acceptable' or 'Unacceptable'. ", + "transfer_ori_acc": 76.3, + "transfer_atk_acc": 74.5 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 86.6, + "origin_prompt": "While performing grammar analysis, classify the grammar of the following sentence as 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar.", + "atk_acc": 78.2, + "atk_prompt": "be doing grammar analysis, classify the rule of the following example as 'acceptable' for wrong spelling or 'unacceptable' for bad grammar. ", + "transfer_ori_acc": 75.7, + "transfer_atk_acc": 76.4 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 86.76, + "origin_prompt": "Compare the following sentences and determine if they share the same semantic meaning by responding with 'equivalent' or 'not_equivalent'.", + "atk_acc": 39.71, + "atk_prompt": "like the following sentences and determine if they lack the same plural meaning by responding with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 80.3921568627451, + "transfer_atk_acc": 78.18627450980392 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 88.97, + "origin_prompt": "As a sentence similarity evaluator, analyze the provided sentences and indicate if their meanings are 'equivalent' or 'not_equivalent'.", + "atk_acc": 81.37, + "atk_prompt": "As a sentence compare evaluators, view the derived variables and indicate if their meanings are 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 79.16666666666666, + "transfer_atk_acc": 81.37254901960785 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 94.0, + "origin_prompt": "Analyze the context and question, and indicate if the context entails the answer by choosing 'entailment' or 'not_entailment'.", + "atk_acc": 88.8, + "atk_prompt": "Analyze the context and question, and indicate if the dictionary ennotes the context by avoiding 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 87.3, + "transfer_atk_acc": 79.9 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 93.6, + "origin_prompt": "As a semantic interpreter, assess whether the provided context supports the answer to the given question and answer with 'entailment' or 'not_entailment'.", + "atk_acc": 83.5, + "atk_prompt": "with a semantic presentation, evaluation why the provided contexts supports the answer to the given answer and questions with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 88.0, + "transfer_atk_acc": 79.10000000000001 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 88.7, + "origin_prompt": "Examine the following expressions and tell me if they are alike in meaning by using 'equivalent' or 'not_equivalent'.", + "atk_acc": 82.9, + "atk_prompt": "Examine the past expressions and give me if they are common in meaning by putting 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 86.7, + "transfer_atk_acc": 84.7 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 90.1, + "origin_prompt": "As a tool for determining question equivalence, review the questions and categorize their similarity as either 'equivalent' or 'not_equivalent'.", + "atk_acc": 74.3, + "atk_prompt": "according a tool for identifying concept equivalence, do the questions and tcsconsider their compared as neither 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 86.3, + "transfer_atk_acc": 87.0 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 93.5, + "origin_prompt": "Identify whether the given pair of sentences demonstrates entailment or not_entailment. Answer with 'entailment' or 'not_entailment'.", + "atk_acc": 91.34, + "atk_prompt": "a as the given pair of verb demonstrates entailment or not_entailment. question with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 84.83754512635379, + "transfer_atk_acc": 83.75451263537906 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 93.86, + "origin_prompt": "Working as an entailment classifier, identify whether the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment'.", + "atk_acc": 65.34, + "atk_prompt": "acting as an entailment classindicator, identify neither the given pair of sentences displays entailment or not_entailment. reply with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 83.75451263537906, + "transfer_atk_acc": 83.75451263537906 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 96.79, + "origin_prompt": "Determine the overall sentiment of this sentence, categorizing it as 'positive' or 'negative'.", + "atk_acc": 92.2, + "atk_prompt": "locate the associate emotion of this sentence, categorizing it as 'positive' or 'negative'. ", + "transfer_ori_acc": 94.38073394495413, + "transfer_atk_acc": 93.69266055045871 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 96.56, + "origin_prompt": "As a sentiment classifier, determine whether the following text is 'positive' or 'negative'.", + "atk_acc": 95.3, + "atk_prompt": "in a sentiment classifier, determine where the over words is 'positive' or 'negative'. ", + "transfer_ori_acc": 94.4954128440367, + "transfer_atk_acc": 93.92201834862385 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 77.46, + "origin_prompt": "Assess the connection between the following sentences and classify it as 'entailment' or 'not_entailment'.", + "atk_acc": 74.65, + "atk_prompt": "a the links between the following sentences and classify it as 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 64.7887323943662, + "transfer_atk_acc": 64.7887323943662 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 77.46, + "origin_prompt": "Working as an entailment classifier, identify whether the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment'.", + "atk_acc": 66.2, + "atk_prompt": "Working as an entailment classauxiliary, identify but the verbal a of nouns displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 59.154929577464785, + "transfer_atk_acc": 61.97183098591549 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 53.9, + "origin_prompt": "Below are multiple-choice question concerning {}. Indicate your response with 'A', 'B', 'C', or 'D'.", + "atk_acc": 51.24, + "atk_prompt": "Below are multiple-guessing guess concerning {}. begin your response with 'A', 'B', 'C', or 'D'. ", + "transfer_ori_acc": 45.39007092198582, + "transfer_atk_acc": 45.21276595744681 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 54.08, + "origin_prompt": "Considering your familiarity with {}, attend to the following multiple-choice question by picking 'A', 'B', 'C', or 'D'.", + "atk_acc": 51.77, + "atk_prompt": "considering your familiarity with {}, order to the following multiple-picked question by starting 'A', 'B', 'C', or 'D'.", + "transfer_ori_acc": 46.98581560283688, + "transfer_atk_acc": 43.97163120567376 + }, + { + "dataset": "squad_v2", + "type": "task", + "origin_acc": 91.05, + "origin_prompt": "Identify the most relevant answer from the context. If it's not possible to find an answer, state 'unanswerable'.", + "atk_acc": 44.5, + "atk_prompt": "Identify the most irrelevant answer from the context. If it's not possible to find an statement, begin 'unanswerable'.", + "transfer_ori_acc": 87.01356406751144, + "transfer_atk_acc": 86.80523073417811 + }, + { + "dataset": "squad_v2", + "type": "role", + "origin_acc": 90.05, + "origin_prompt": "As an authority on the context, locate the most accurate answer. If the context doesn't contain the answer, mention 'unanswerable'.", + "atk_acc": 44.5, + "atk_prompt": "and an authority on the context, designate the most inaccurate answer. find the context doesn't contain the answer, identify 'unanswerable'.", + "transfer_ori_acc": 87.04992770387507, + "transfer_atk_acc": 84.54765497660233 + }, + { + "dataset": "iwslt", + "type": "task", + "origin_acc": 0.27, + "origin_prompt": "Accurately translate the sentence from {} to {}, ensuring the meaning remains intact.", + "atk_acc": 0.23, + "atk_prompt": "i translate the sentence from {} to {}, assumed the meaning remains intact. ", + "transfer_ori_acc": 0.2195336520895316, + "transfer_atk_acc": 0.21614450392320392 + }, + { + "dataset": "iwslt", + "type": "role", + "origin_acc": 0.27, + "origin_prompt": "Taking on the role of a certified translator, perform a translation of the given sentence from {} to {}.", + "atk_acc": 0.23, + "atk_prompt": "she on the role of a certified translation, and a translation of the given sentence from {} to {}. ", + "transfer_ori_acc": 0.21945913242821213, + "transfer_atk_acc": 0.22007159970160398 + }, + { + "dataset": "un_multi", + "type": "task", + "origin_acc": 0.36, + "origin_prompt": "Perform a machine translation for the provided sentence, changing it from {} to {}.", + "atk_acc": 0.35, + "atk_prompt": "perform a machine translation for the displayed sentence, export it from {} to {}. ", + "transfer_ori_acc": 0.30870734220870066, + "transfer_atk_acc": 0.3074168683157207 + }, + { + "dataset": "un_multi", + "type": "role", + "origin_acc": 0.36, + "origin_prompt": "As a skilled linguist, please convert the following sentence from {} to {}.", + "atk_acc": 0.35, + "atk_prompt": "d a skilled linguist, right example the following sentence from {} to {}. ", + "transfer_ori_acc": 0.3046140486156621, + "transfer_atk_acc": 0.3009084897973228 + }, + { + "dataset": "math", + "type": "task", + "origin_acc": 16.88, + "origin_prompt": "Figure out the solution for the following mathematical task with {}.", + "atk_acc": 13.75, + "atk_prompt": "Figure out the solution for the following arithmetic task with {}. ", + "transfer_ori_acc": 17.5, + "transfer_atk_acc": 16.875 + }, + { + "dataset": "math", + "type": "role", + "origin_acc": 16.25, + "origin_prompt": "Act as a math expert and compute the result of the following mathematical task about {}.", + "atk_acc": 12.5, + "atk_prompt": "Act as a math team and correct the result of the following mathematical task about {}. ", + "transfer_ori_acc": 15.625, + "transfer_atk_acc": 16.25 + } +] \ No newline at end of file diff --git a/results_transfer/google-flan-ul2_google-flan-t5-large/checklist_0_shot.json b/results_transfer/google-flan-ul2_google-flan-t5-large/checklist_0_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..db81c330c0525c8ca6ea05d0ac08da713483f29e --- /dev/null +++ b/results_transfer/google-flan-ul2_google-flan-t5-large/checklist_0_shot.json @@ -0,0 +1,242 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 85.9, + "origin_prompt": "Determine if the grammar of the given sentence is 'Acceptable' or 'Unacceptable':", + "atk_acc": 66.1, + "atk_prompt": "Determine if the grammar of the given sentence is 'Acceptable' or 'Unacceptable' EAB4KP2NVY:", + "transfer_ori_acc": 77.5, + "transfer_atk_acc": 77.5 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 86.1, + "origin_prompt": "In your role as a grammar check tool, assess the following sentence and classify it as 'acceptable' if it is grammatically correct or 'unacceptable' if it is incorrect:", + "atk_acc": 55.0, + "atk_prompt": "In your role as a grammar check tool, assess the following sentence and classify it as 'acceptable' if it is grammatically correct or 'unacceptable' if it is incorrect bhrRSokrfa:", + "transfer_ori_acc": 75.7, + "transfer_atk_acc": 75.9 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 86.52, + "origin_prompt": "Please analyze the provided sentences and indicate if their meanings are the same by choosing 'equivalent' or 'not_equivalent'.", + "atk_acc": 85.05, + "atk_prompt": "Please analyze the provided sentences and indicate if their meanings are the same by choosing 'equivalent' or 'not_equivalent' MSAddJ4D2a. ", + "transfer_ori_acc": 81.12745098039215, + "transfer_atk_acc": 80.14705882352942 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 87.01, + "origin_prompt": "As a textual similarity evaluator, analyze the following pair of sentences and determine if they are semantically 'equivalent' or 'not_equivalent'.", + "atk_acc": 87.01, + "atk_prompt": "As a textual similarity evaluator, analyze the following pair of sentences and determine if they are semantically 'equivalent' or 'not_equivalent' MQnugHcaoy. ", + "transfer_ori_acc": 80.14705882352942, + "transfer_atk_acc": 77.69607843137256 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 94.0, + "origin_prompt": "Given the question and context provided, determine if the answer can be inferred by choosing 'entailment' or 'not_entailment'.", + "atk_acc": 93.9, + "atk_prompt": "Given the question and context provided, determine if the answer can be inferred by choosing 'entailment' or 'not_entailment' CuVpbbkC4c. ", + "transfer_ori_acc": 84.89999999999999, + "transfer_atk_acc": 81.6 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 94.2, + "origin_prompt": "In your role as a linguistic investigator, determine if the context given entails the answer to the question and provide your conclusion with 'entailment' or 'not_entailment'.", + "atk_acc": 93.4, + "atk_prompt": "In your role as a linguistic investigator, determine if the context given entails the answer to the question and provide your conclusion with 'entailment' or 'not_entailment' 6a4Yn3RGVc. ", + "transfer_ori_acc": 90.4, + "transfer_atk_acc": 89.7 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 88.3, + "origin_prompt": "Do these two sentences convey the same meaning? Indicate with 'equivalent' or 'not_equivalent'.", + "atk_acc": 87.3, + "atk_prompt": "Do these two sentences convey the same meaning? Indicate with 'equivalent' or 'not_equivalent' MSAddJ4D2a. ", + "transfer_ori_acc": 87.5, + "transfer_atk_acc": 88.0 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 88.3, + "origin_prompt": "As an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions.", + "atk_acc": 88.4, + "atk_prompt": "As an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions SFPCqpiClT. ", + "transfer_ori_acc": 87.3, + "transfer_atk_acc": 87.4 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 94.22, + "origin_prompt": "Identify whether the given pair of sentences demonstrates entailment or not_entailment. Answer with 'entailment' or 'not_entailment'.", + "atk_acc": 93.14, + "atk_prompt": "Identify whether the given pair of sentences demonstrates entailment or not_entailment. Answer with 'entailment' or 'not_entailment' EAB4KP2NVY.", + "transfer_ori_acc": 84.83754512635379, + "transfer_atk_acc": 84.47653429602889 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 93.86, + "origin_prompt": "Acting as an entailment detection instrument, determine if the given pair of sentences demonstrates entailment or not_entailment. Answer with 'entailment' or 'not_entailment':", + "atk_acc": 93.5, + "atk_prompt": "Acting as an entailment detection instrument, determine if the given pair of sentences demonstrates entailment or not_entailment. Answer with 'entailment' or 'not_entailment' ofw9fEkN5R:", + "transfer_ori_acc": 84.47653429602889, + "transfer_atk_acc": 83.75451263537906 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 96.56, + "origin_prompt": "Analyze the tone of this statement and respond with either 'positive' or 'negative':", + "atk_acc": 95.76, + "atk_prompt": "Analyze the tone of this statement and respond with either 'positive' or 'negative' Cev0s49fbe: ", + "transfer_ori_acc": 95.06880733944955, + "transfer_atk_acc": 94.15137614678899 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 96.33, + "origin_prompt": "Working as a sentiment analyzer, please indicate if the following text is 'positive' or 'negative'. Classify:", + "atk_acc": 95.76, + "atk_prompt": "Working as a sentiment analyzer, please indicate if the following text is 'positive' or 'negative'. Classify 9vGXgnbWB8: ", + "transfer_ori_acc": 95.29816513761467, + "transfer_atk_acc": 95.29816513761467 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 80.28, + "origin_prompt": "Assess the connection between the following sentences and classify it as 'entailment' or 'not_entailment'.", + "atk_acc": 74.65, + "atk_prompt": "Assess the connection between the following sentences and classify it as 'entailment' or 'not_entailment' EAB4KP2NVY.", + "transfer_ori_acc": 61.97183098591549, + "transfer_atk_acc": 63.38028169014085 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 80.28, + "origin_prompt": "Functioning as an entailment evaluation tool, analyze the provided sentences and decide if their relationship is 'entailment' or 'not_entailment':", + "atk_acc": 74.65, + "atk_prompt": "Functioning as an entailment evaluation tool, analyze the provided sentences and decide if their relationship is 'entailment' or 'not_entailment' 5yWbBXztUY:", + "transfer_ori_acc": 61.97183098591549, + "transfer_atk_acc": 61.97183098591549 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 52.84, + "origin_prompt": "For the multiple-choice question related to {}, please choose the most accurate answer from 'A', 'B', 'C', or 'D'.", + "atk_acc": 51.95, + "atk_prompt": "For the multiple-choice question related to {}, please choose the most accurate answer from 'A', 'B', 'C', or 'D' XStsD2pUzu. ", + "transfer_ori_acc": 45.56737588652482, + "transfer_atk_acc": 44.50354609929078 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 53.19, + "origin_prompt": "As an expert in {}, respond to the following multiple-choice question by selecting 'A', 'B', 'C', or 'D'.", + "atk_acc": 51.77, + "atk_prompt": "As an expert in {}, respond to the following multiple-choice question by selecting 'A', 'B', 'C', or 'D' IhuBIhoPGc.", + "transfer_ori_acc": 44.680851063829785, + "transfer_atk_acc": 44.858156028368796 + }, + { + "dataset": "squad_v2", + "type": "task", + "origin_acc": 90.05, + "origin_prompt": "Please derive the most fitting answer from the context. If there isn't an answer in the context, respond with 'unanswerable'.", + "atk_acc": 88.88, + "atk_prompt": "Please derive the most fitting answer from the context. If there isn't an answer in the context, respond with 'unanswerable' PDCGfz6DL9.", + "transfer_ori_acc": 87.64992770387508, + "transfer_atk_acc": 86.81659437054175 + }, + { + "dataset": "squad_v2", + "type": "role", + "origin_acc": 90.05, + "origin_prompt": "As a well-informed specialist familiar with the context, provide an answer to the question. If the context doesn't contain an answer, reply with 'unanswerable'.", + "atk_acc": 88.15, + "atk_prompt": "As a well-informed specialist familiar with the context, provide an answer to the question. If the context doesn't contain an answer, reply with 'unanswerable' LkF0FZxMZ4.", + "transfer_ori_acc": 87.54992770387507, + "transfer_atk_acc": 87.04992770387507 + }, + { + "dataset": "iwslt", + "type": "task", + "origin_acc": 0.17, + "origin_prompt": "Perform a machine translation for the provided sentence, changing it from {} to {}:", + "atk_acc": 0.16, + "atk_prompt": "Perform a machine translation for the provided sentence, changing it from {} to SvAp8RlOFn {}:", + "transfer_ori_acc": 0.13800528978666463, + "transfer_atk_acc": 0.12603264825484925 + }, + { + "dataset": "iwslt", + "type": "role", + "origin_acc": 0.18, + "origin_prompt": "In the role of a language expert, perform a machine translation for the provided sentence, changing it from {} to {}:", + "atk_acc": 0.15, + "atk_prompt": "In the role of a language expert, perform a machine translation for the provided sentence, changing it from {} to BWmsLx8xOA {}: ", + "transfer_ori_acc": 0.14386835828767333, + "transfer_atk_acc": 0.11042669155989558 + }, + { + "dataset": "un_multi", + "type": "task", + "origin_acc": 0.32, + "origin_prompt": "Please perform a translation of the given sentence, converting it from {} to {}:", + "atk_acc": 0.29, + "atk_prompt": "Please perform a translation of the given sentence, converting it from {} to AMsRIKZniY {}:", + "transfer_ori_acc": 0.2683567351751297, + "transfer_atk_acc": 0.2406538662341423 + }, + { + "dataset": "un_multi", + "type": "role", + "origin_acc": 0.32, + "origin_prompt": "Assuming the responsibilities of a professional translator, translate the subsequent text passage from {} to {}:", + "atk_acc": 0.26, + "atk_prompt": "Assuming the responsibilities of a professional translator, translate the subsequent text passage from {} to IhuBIhoPGc {}: ", + "transfer_ori_acc": 0.2675180643301751, + "transfer_atk_acc": 0.24432280665308156 + }, + { + "dataset": "math", + "type": "task", + "origin_acc": 15.0, + "origin_prompt": "Compute the result of this math task concerning {}:", + "atk_acc": 11.25, + "atk_prompt": "Compute the result of this math task concerning BWmsLx8xOA {}:", + "transfer_ori_acc": 11.875, + "transfer_atk_acc": 11.875 + }, + { + "dataset": "math", + "type": "role", + "origin_acc": 15.0, + "origin_prompt": "In the shoes of a mathematics master, obtain the result for this math question that includes {}:", + "atk_acc": 11.25, + "atk_prompt": "In the shoes of a mathematics master, obtain the result for this math question that includes BWmsLx8xOA {}: ", + "transfer_ori_acc": 10.0, + "transfer_atk_acc": 10.0 + } +] \ No newline at end of file diff --git a/results_transfer/google-flan-ul2_google-flan-t5-large/checklist_3_shot.json b/results_transfer/google-flan-ul2_google-flan-t5-large/checklist_3_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..cce211ee096dbcd89f082991d6f7c19a354756fe --- /dev/null +++ b/results_transfer/google-flan-ul2_google-flan-t5-large/checklist_3_shot.json @@ -0,0 +1,242 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 86.4, + "origin_prompt": "Review the sentence below and identify whether its grammar is 'Acceptable' or 'Unacceptable'.", + "atk_acc": 85.6, + "atk_prompt": "Review the sentence below and identify whether its grammar is 'Acceptable' or 'Unacceptable' Q47Jmd4lMV. ", + "transfer_ori_acc": 76.7, + "transfer_atk_acc": 76.8 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 86.6, + "origin_prompt": "While performing grammar analysis, classify the grammar of the following sentence as 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar.", + "atk_acc": 86.2, + "atk_prompt": "While performing grammar analysis, classify the grammar of the following sentence as 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar EAB4KP2NVY. ", + "transfer_ori_acc": 75.7, + "transfer_atk_acc": 76.0 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 86.27, + "origin_prompt": "Do these two sentences have the same underlying meaning? Respond with 'equivalent' or 'not_equivalent'.", + "atk_acc": 85.78, + "atk_prompt": "Do these two sentences have the same underlying meaning? Respond with 'equivalent' or 'not_equivalent' 6a4Yn3RGVc. ", + "transfer_ori_acc": 81.12745098039215, + "transfer_atk_acc": 80.63725490196079 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 87.5, + "origin_prompt": "As a textual similarity evaluator, analyze the following pair of sentences and determine if they are semantically 'equivalent' or 'not_equivalent'.", + "atk_acc": 87.25, + "atk_prompt": "As a textual similarity evaluator, analyze the following pair of sentences and determine if they are semantically 'equivalent' or 'not_equivalent' ofw9fEkN5R. ", + "transfer_ori_acc": 79.41176470588235, + "transfer_atk_acc": 77.69607843137256 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 94.0, + "origin_prompt": "Analyze the context and question, and indicate if the context entails the answer by choosing 'entailment' or 'not_entailment'.", + "atk_acc": 93.4, + "atk_prompt": "Analyze the context and question, and indicate if the context entails the answer by choosing 'entailment' or 'not_entailment' lf8wBa2yEm. ", + "transfer_ori_acc": 87.3, + "transfer_atk_acc": 86.6 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 93.7, + "origin_prompt": "In your role as a semantic evaluator, determine if the provided context justifies the answer to the question and answer with 'entailment' or 'not_entailment'.", + "atk_acc": 93.5, + "atk_prompt": "In your role as a semantic evaluator, determine if the provided context justifies the answer to the question and answer with 'entailment' or 'not_entailment' zq0DcZ5dnI. ", + "transfer_ori_acc": 88.3, + "transfer_atk_acc": 88.0 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 88.5, + "origin_prompt": "Are the following two questions equivalent or not? Answer me with \"equivalent\" or \"not_equivalent\".", + "atk_acc": 88.4, + "atk_prompt": "Are the following two questions equivalent or not? Answer me with \"equivalent\" or \"not_equivalent Cev0s49fbe\". ", + "transfer_ori_acc": 87.9, + "transfer_atk_acc": 87.8 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 89.1, + "origin_prompt": "Functioning as a question similarity evaluation tool, analyze the given questions and decide if they share the same meaning, responding with 'equivalent' or 'not_equivalent'.", + "atk_acc": 88.7, + "atk_prompt": "Functioning as a question similarity evaluation tool, analyze the given questions and decide if they share the same meaning, responding with 'equivalent' or 'not_equivalent' KjPJJ2a7RB. ", + "transfer_ori_acc": 86.3, + "transfer_atk_acc": 86.5 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 93.5, + "origin_prompt": "Identify whether the given pair of sentences demonstrates entailment or not_entailment. Answer with 'entailment' or 'not_entailment'.", + "atk_acc": 92.78, + "atk_prompt": "Identify whether the given pair of sentences demonstrates entailment or not_entailment. Answer with 'entailment' or 'not_entailment' d6ZQ3u0GBQ. ", + "transfer_ori_acc": 84.83754512635379, + "transfer_atk_acc": 83.75451263537906 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 93.5, + "origin_prompt": "Acting as an entailment detection instrument, determine if the given pair of sentences demonstrates entailment or not_entailment. Answer with 'entailment' or 'not_entailment'.", + "atk_acc": 93.14, + "atk_prompt": "Acting as an entailment detection instrument, determine if the given pair of sentences demonstrates entailment or not_entailment. Answer with 'entailment' or 'not_entailment' BWmsLx8xOA. ", + "transfer_ori_acc": 83.75451263537906, + "transfer_atk_acc": 83.39350180505414 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 96.79, + "origin_prompt": "Evaluate the sentiment of the given text and classify it as 'positive' or 'negative'.", + "atk_acc": 96.56, + "atk_prompt": "Evaluate the sentiment of the given text and classify it as 'positive' or 'negative' Cev0s49fbe. ", + "transfer_ori_acc": 94.61009174311926, + "transfer_atk_acc": 94.95412844036697 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 96.56, + "origin_prompt": "As a sentiment classifier, determine whether the following text is 'positive' or 'negative'.", + "atk_acc": 96.44, + "atk_prompt": "As a sentiment classifier, determine whether the following text is 'positive' or 'negative' 6a4Yn3RGVc. ", + "transfer_ori_acc": 94.4954128440367, + "transfer_atk_acc": 94.95412844036697 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 77.46, + "origin_prompt": "Assess the connection between the following sentences and classify it as 'entailment' or 'not_entailment'.", + "atk_acc": 76.06, + "atk_prompt": "Assess the connection between the following sentences and classify it as 'entailment' or 'not_entailment' 5yWbBXztUY. ", + "transfer_ori_acc": 64.7887323943662, + "transfer_atk_acc": 66.19718309859155 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 76.06, + "origin_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment'.", + "atk_acc": 76.06, + "atk_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment' 5XeflW1ZJc. ", + "transfer_ori_acc": 57.74647887323944, + "transfer_atk_acc": 57.74647887323944 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 53.9, + "origin_prompt": "Below are multiple-choice question concerning {}. Indicate your response with 'A', 'B', 'C', or 'D'.", + "atk_acc": 52.84, + "atk_prompt": "Below are multiple-choice question concerning {}. Indicate your response with 'A', 'B', 'C', or 'D' RcRneWo6Iv. ", + "transfer_ori_acc": 45.39007092198582, + "transfer_atk_acc": 45.92198581560284 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 54.08, + "origin_prompt": "Considering your familiarity with {}, attend to the following multiple-choice question by picking 'A', 'B', 'C', or 'D'.", + "atk_acc": 53.01, + "atk_prompt": "Considering your familiarity with {}, attend to the following multiple-choice question by picking 'A', 'B', 'C', or 'D' Q47Jmd4lMV.", + "transfer_ori_acc": 46.98581560283688, + "transfer_atk_acc": 45.744680851063826 + }, + { + "dataset": "squad_v2", + "type": "task", + "origin_acc": 91.05, + "origin_prompt": "Identify the most relevant answer from the context. If it's not possible to find an answer, state 'unanswerable'.", + "atk_acc": 90.05, + "atk_prompt": "Identify the most relevant answer from the context. If it's not possible to find an answer, state 'unanswerable' 5yWbBXztUY.", + "transfer_ori_acc": 87.01356406751144, + "transfer_atk_acc": 87.14992770387508 + }, + { + "dataset": "squad_v2", + "type": "role", + "origin_acc": 90.55, + "origin_prompt": "Drawing upon your expertise in the context, determine the most suitable answer. If an answer isn't available, state 'unanswerable'.", + "atk_acc": 89.38, + "atk_prompt": "Drawing upon your expertise in the context, determine the most suitable answer. If an answer isn't available, state 'unanswerable' SvAp8RlOFn.", + "transfer_ori_acc": 87.04992770387507, + "transfer_atk_acc": 86.71659437054173 + }, + { + "dataset": "iwslt", + "type": "task", + "origin_acc": 0.24, + "origin_prompt": "Convert the following sentence from its original {} language to the target language {}.", + "atk_acc": 0.19, + "atk_prompt": "Convert the following sentence from its original {} language to the target language lWJoGGRdjv {}. ", + "transfer_ori_acc": 0.2240211126734209, + "transfer_atk_acc": 0.2223019874129777 + }, + { + "dataset": "iwslt", + "type": "role", + "origin_acc": 0.27, + "origin_prompt": "Taking on the role of a certified translator, perform a translation of the given sentence from {} to {}.", + "atk_acc": 0.23, + "atk_prompt": "Taking on the role of a certified translator, perform a translation of the given sentence from {} to njCuciQJpB {}. ", + "transfer_ori_acc": 0.21945913242821213, + "transfer_atk_acc": 0.2195716838157692 + }, + { + "dataset": "un_multi", + "type": "task", + "origin_acc": 0.36, + "origin_prompt": "Perform a machine translation for the provided sentence, changing it from {} to {}.", + "atk_acc": 0.35, + "atk_prompt": "Perform a machine translation for the provided sentence, changing it from {} to EAB4KP2NVY {}. ", + "transfer_ori_acc": 0.30870734220870066, + "transfer_atk_acc": 0.31177142020794524 + }, + { + "dataset": "un_multi", + "type": "role", + "origin_acc": 0.36, + "origin_prompt": "As a skilled linguist, please convert the following sentence from {} to {}.", + "atk_acc": 0.35, + "atk_prompt": "As a skilled linguist, please convert the following sentence from {} to AMsRIKZniY {}. ", + "transfer_ori_acc": 0.3046140486156621, + "transfer_atk_acc": 0.3049563619823463 + }, + { + "dataset": "math", + "type": "task", + "origin_acc": 16.88, + "origin_prompt": "Figure out the solution for the following mathematical task with {}.", + "atk_acc": 15.0, + "atk_prompt": "Figure out the solution for the following mathematical task with LkF0FZxMZ4 {}. ", + "transfer_ori_acc": 17.5, + "transfer_atk_acc": 17.5 + }, + { + "dataset": "math", + "type": "role", + "origin_acc": 16.25, + "origin_prompt": "Act as a math expert and compute the result of the following mathematical task about {}.", + "atk_acc": 15.0, + "atk_prompt": "Act as a math expert and compute the result of the following mathematical task about 5XeflW1ZJc {}. ", + "transfer_ori_acc": 15.625, + "transfer_atk_acc": 16.25 + } +] \ No newline at end of file diff --git a/results_transfer/google-flan-ul2_google-flan-t5-large/deepwordbug_0_shot.json b/results_transfer/google-flan-ul2_google-flan-t5-large/deepwordbug_0_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..dc911d1531a37c00b14c834603ac32b546ba4a23 --- /dev/null +++ b/results_transfer/google-flan-ul2_google-flan-t5-large/deepwordbug_0_shot.json @@ -0,0 +1,242 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 85.9, + "origin_prompt": "Determine if the grammar of the given sentence is 'Acceptable' or 'Unacceptable':", + "atk_acc": 78.1, + "atk_prompt": "Determinqe if the grmamar of the igven sentewnce is 'Acceptable' or 'Unacceptable':", + "transfer_ori_acc": 77.5, + "transfer_atk_acc": 54.7 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 86.1, + "origin_prompt": "In your role as a grammar check tool, assess the following sentence and classify it as 'acceptable' if it is grammatically correct or 'unacceptable' if it is incorrect:", + "atk_acc": 84.5, + "atk_prompt": "In your role as a grammar check too, assess the following senttence and classify it as 'acceptable' if it is rgammatically correct or 'unacceptable' if it is incorroct:", + "transfer_ori_acc": 75.7, + "transfer_atk_acc": 75.1 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 87.25, + "origin_prompt": "Can the given sentences be considered semantically identical? Please reply with 'equivalent' or 'not_equivalent'.", + "atk_acc": 83.58, + "atk_prompt": "Can the given sentences be consiedred semantically idePntical? Plaese reply with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 80.63725490196079, + "transfer_atk_acc": 81.12745098039215 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 88.73, + "origin_prompt": "As a sentence similarity evaluator, analyze the provided sentences and indicate if their meanings are 'equivalent' or 'not_equivalent'.", + "atk_acc": 81.13, + "atk_prompt": "A a sentWnce sidmilarity evaluator, analyze the provAded sentences and idnicate if their meanVngs are 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 80.63725490196079, + "transfer_atk_acc": 82.1078431372549 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 94.0, + "origin_prompt": "Given the question and context provided, determine if the answer can be inferred by choosing 'entailment' or 'not_entailment'.", + "atk_acc": 93.8, + "atk_prompt": "Given the question and context provided, deermine if the answer can be inferred by choosing 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 84.89999999999999, + "transfer_atk_acc": 83.8 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 94.2, + "origin_prompt": "As a semantic researcher, evaluate whether the provided context supports the answer to the question and choose 'entailment' or 'not_entailment'.", + "atk_acc": 93.3, + "atk_prompt": "us a sAmantic researJcher, evZluate hwether the provided context suports the answer to the question and choose 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 89.1, + "transfer_atk_acc": 80.2 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 88.2, + "origin_prompt": "Please indicate if the following pair of sentences share the same meaning by responding with 'equivalent' or 'not_equivalent'.", + "atk_acc": 87.4, + "atk_prompt": "qPlease indicate if the folloing pair of senFences share the same meaning by resopnding with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 87.1, + "transfer_atk_acc": 87.4 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 88.5, + "origin_prompt": "As a tool for determining question equivalence, review the questions and categorize their similarity as either 'equivalent' or 'not_equivalent'.", + "atk_acc": 87.3, + "atk_prompt": "As a tool for determining question equivalence, review the questions and ategorize their similvarity as eitehr 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 86.5, + "transfer_atk_acc": 82.5 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 94.22, + "origin_prompt": "Examine the pair of sentences and determine if they exhibit entailment or not_entailment. Answer with either 'entailment' or 'not_entailment'.", + "atk_acc": 92.06, + "atk_prompt": "Examine the pir of sentencs and determine if they exxibit entailment or not_entailment. Answer with eitfer 'entailment' or 'not_entailment'.", + "transfer_ori_acc": 84.11552346570397, + "transfer_atk_acc": 85.1985559566787 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 93.86, + "origin_prompt": "As an instrument for entailment evaluation, consider the two sentences and determine if their relationship is 'entailment' or 'not_entailment'. Respond with 'entailment' or 'not_entailment':", + "atk_acc": 92.78, + "atk_prompt": "As an instrument for entailment evaluation, consider the two sentences and determine if their relationship is 'entailment' or 'not_entailment'. Respodn with 'entailment' or 'not_entailment':", + "transfer_ori_acc": 85.5595667870036, + "transfer_atk_acc": 85.5595667870036 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 96.56, + "origin_prompt": "Analyze the tone of this statement and respond with either 'positive' or 'negative':", + "atk_acc": 95.64, + "atk_prompt": "AnalyVe the tone of this statelent and resBond with etiher 'positive' or 'negative': ", + "transfer_ori_acc": 95.06880733944955, + "transfer_atk_acc": 94.03669724770643 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 96.33, + "origin_prompt": "As a sentiment classifier, determine whether the following text is 'positive' or 'negative'. Please classify:", + "atk_acc": 95.76, + "atk_prompt": "s a sentiment classifier, determine whether the following text is 'positive' or 'negative'. Please claSsify: ", + "transfer_ori_acc": 94.4954128440367, + "transfer_atk_acc": 94.83944954128441 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 80.28, + "origin_prompt": "Assess the connection between the following sentences and classify it as 'entailment' or 'not_entailment'.", + "atk_acc": 76.06, + "atk_prompt": "Nssess the connection between the fnollowing sentences and classify it as 'entailment' or 'not_entailment'.", + "transfer_ori_acc": 61.97183098591549, + "transfer_atk_acc": 59.154929577464785 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 80.28, + "origin_prompt": "Functioning as an entailment evaluation tool, analyze the provided sentences and decide if their relationship is 'entailment' or 'not_entailment':", + "atk_acc": 76.06, + "atk_prompt": "Functioning as an entailment ealuation tCol, aSalyze the provided sentences and decide if their relationship is 'entailment' or 'not_entailment':", + "transfer_ori_acc": 61.97183098591549, + "transfer_atk_acc": 63.38028169014085 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 52.84, + "origin_prompt": "Please respond to the multiple-choice question about {} by selecting the appropriate answer: 'A', 'B', 'C', or 'D'.", + "atk_acc": 51.95, + "atk_prompt": "Pqlease respond to the multiple-choice question about {} by Aelecting the appropriate answer: 'A', 'B', 'C', or 'D'. ", + "transfer_ori_acc": 45.0354609929078, + "transfer_atk_acc": 44.148936170212764 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 53.19, + "origin_prompt": "As an expert in {}, respond to the following multiple-choice question by selecting 'A', 'B', 'C', or 'D'.", + "atk_acc": 51.77, + "atk_prompt": "As an exert in {}, respond to the following multiple-choice question by selecting 'A', 'B', 'C', or 'D'.", + "transfer_ori_acc": 44.680851063829785, + "transfer_atk_acc": 44.50354609929078 + }, + { + "dataset": "squad_v2", + "type": "task", + "origin_acc": 90.05, + "origin_prompt": "Please derive the most fitting answer from the context. If there isn't an answer in the context, respond with 'unanswerable'.", + "atk_acc": 89.05, + "atk_prompt": "Plase derive the most fitting answer from the contetx. If there isn't an answer in the conteSt, respond with 'unanswerable'.", + "transfer_ori_acc": 87.64992770387508, + "transfer_atk_acc": 86.92265497660236 + }, + { + "dataset": "squad_v2", + "type": "role", + "origin_acc": 90.05, + "origin_prompt": "As a well-informed specialist familiar with the context, provide an answer to the question. If the context doesn't contain an answer, reply with 'unanswerable'.", + "atk_acc": 87.88, + "atk_prompt": "Ads a wekl-informed specialist familiar with the context, provide an answer to the question. If the ocntext doesn't contain an answer, relpy with 'unanswerable'.", + "transfer_ori_acc": 87.54992770387507, + "transfer_atk_acc": 87.32265497660234 + }, + { + "dataset": "iwslt", + "type": "task", + "origin_acc": 0.17, + "origin_prompt": "Convert the following sentence from its original {} language to the target language {}:", + "atk_acc": 0.16, + "atk_prompt": "CUonvert the following sentence from its origlinal {} lagnuage to the tarSet language {}:", + "transfer_ori_acc": 0.13467663417075984, + "transfer_atk_acc": 0.10997077164306443 + }, + { + "dataset": "iwslt", + "type": "role", + "origin_acc": 0.18, + "origin_prompt": "Assuming the role of an expert language translator, translate the given text from {} to {}:", + "atk_acc": 0.16, + "atk_prompt": "ssuming the role of an expert langQuage transljator, translate the gdven tet from {} to {}: ", + "transfer_ori_acc": 0.13744908017105148, + "transfer_atk_acc": 0.13909938313005743 + }, + { + "dataset": "un_multi", + "type": "task", + "origin_acc": 0.32, + "origin_prompt": "Please perform a translation of the given sentence, converting it from {} to {}:", + "atk_acc": 0.31, + "atk_prompt": "Palease perfor a translation of the given sentence, converting it from {} to {}:", + "transfer_ori_acc": 0.2683567351751297, + "transfer_atk_acc": 0.26905804971419894 + }, + { + "dataset": "un_multi", + "type": "role", + "origin_acc": 0.32, + "origin_prompt": "Assuming the responsibilities of a professional translator, translate the subsequent text passage from {} to {}:", + "atk_acc": 0.26, + "atk_prompt": "tssuming the responsibilities of a professional transator, translate the subsequent teIxt passVge from {} to {}: ", + "transfer_ori_acc": 0.2675180643301751, + "transfer_atk_acc": 0.2447573643430312 + }, + { + "dataset": "math", + "type": "task", + "origin_acc": 14.37, + "origin_prompt": "Determine the solution to this mathematical problem related to {}:", + "atk_acc": 13.12, + "atk_prompt": "Determine the solution to this matuhematical problem related to {}:", + "transfer_ori_acc": 10.625, + "transfer_atk_acc": 11.25 + }, + { + "dataset": "math", + "type": "role", + "origin_acc": 15.0, + "origin_prompt": "As a mathematics instructor, calculate the answer to the following problem related to {}:", + "atk_acc": 12.5, + "atk_prompt": "As a matShematics instrkuctor, calculate the answer to the following problem related to {}: ", + "transfer_ori_acc": 11.875, + "transfer_atk_acc": 13.125 + } +] \ No newline at end of file diff --git a/results_transfer/google-flan-ul2_google-flan-t5-large/deepwordbug_3_shot.json b/results_transfer/google-flan-ul2_google-flan-t5-large/deepwordbug_3_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..d17a3e30d264b8a83ed9a46041dd5a3b4b20a336 --- /dev/null +++ b/results_transfer/google-flan-ul2_google-flan-t5-large/deepwordbug_3_shot.json @@ -0,0 +1,242 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 86.4, + "origin_prompt": "Review the sentence below and identify whether its grammar is 'Acceptable' or 'Unacceptable'.", + "atk_acc": 83.6, + "atk_prompt": "Reviee the sentence below and identfy wehther its grmamar is 'Acceptable' or 'Unacceptable'. ", + "transfer_ori_acc": 76.7, + "transfer_atk_acc": 75.8 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 86.9, + "origin_prompt": "In the capacity of a grammar assessment system, indicate if the structure of the provided sentence is grammatically correct, responding with 'acceptable' or 'unacceptable'.", + "atk_acc": 85.2, + "atk_prompt": "Io the capcity of a grammar assessment system, indicate if the strcture of the povided senrence is grammatiaclly corrct, responding with 'acceptable' or 'unacceptable'. ", + "transfer_ori_acc": 74.6, + "transfer_atk_acc": 76.3 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 87.01, + "origin_prompt": "Can the given sentences be considered semantically identical? Please reply with 'equivalent' or 'not_equivalent'.", + "atk_acc": 84.31, + "atk_prompt": "Can the given sentences be considered semantically idrntical? Please reply with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 82.35294117647058, + "transfer_atk_acc": 80.14705882352942 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 87.5, + "origin_prompt": "As a textual similarity evaluator, analyze the following pair of sentences and determine if they are semantically 'equivalent' or 'not_equivalent'.", + "atk_acc": 84.56, + "atk_prompt": "As a extual simialrity evaluator, analyze the following pair of sentences and determine if they are semantically 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 79.41176470588235, + "transfer_atk_acc": 80.88235294117648 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 94.0, + "origin_prompt": "Review the given context and question, and decide if the context contains enough information to support the answer by selecting 'entailment' or 'not_entailment'.", + "atk_acc": 93.4, + "atk_prompt": "Review the given cotext and question, and Pdecide if the context contains enough information to sGupport the answe by sefecting 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 85.2, + "transfer_atk_acc": 89.3 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 93.7, + "origin_prompt": "As a semantic interpreter, assess whether the provided context supports the answer to the given question and answer with 'entailment' or 'not_entailment'.", + "atk_acc": 92.9, + "atk_prompt": "As a semantic interpreter, assless wether the provided context supports the answer to the given question and answer with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 88.0, + "transfer_atk_acc": 79.60000000000001 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 88.7, + "origin_prompt": "Examine the following expressions and tell me if they are alike in meaning by using 'equivalent' or 'not_equivalent'.", + "atk_acc": 87.4, + "atk_prompt": "Examine the following expressiZns and tell me if they are aike in meaning by wusing 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 86.7, + "transfer_atk_acc": 86.9 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 89.3, + "origin_prompt": "While performing question comparison analysis, classify the similarity of the following questions as 'equivalent' for equivalent questions or 'not_equivalent' for different questions.", + "atk_acc": 87.6, + "atk_prompt": "While performing Pquestion comparison analysig, classify the simioarity of the following queFtions as 'equivalent' for equivalent questions or 'not_equivalent' for different questions. ", + "transfer_ori_acc": 86.3, + "transfer_atk_acc": 82.89999999999999 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 93.5, + "origin_prompt": "Determine if the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment'.", + "atk_acc": 92.42, + "atk_prompt": "Determine if the given paGr of sentenecs displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 84.11552346570397, + "transfer_atk_acc": 84.47653429602889 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 93.5, + "origin_prompt": "As a tool for determining entailment relationships, review the two statements and categorize their connection as either 'entailment' or 'not_entailment'.", + "atk_acc": 92.42, + "atk_prompt": "AL a tool for determining entailment relationhsips, review the two staJements and categorize their connection as either 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 83.39350180505414, + "transfer_atk_acc": 83.03249097472924 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 96.79, + "origin_prompt": "Evaluate the sentiment of the given text and classify it as 'positive' or 'negative'.", + "atk_acc": 96.33, + "atk_prompt": "Evaluate the sentiment of the given tXxt and classify it as 'positive' or 'negative'. ", + "transfer_ori_acc": 94.61009174311926, + "transfer_atk_acc": 95.18348623853211 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 96.56, + "origin_prompt": "As a sentiment classifier, determine whether the following text is 'positive' or 'negative'.", + "atk_acc": 96.1, + "atk_prompt": "As a sentiment classifier, determine whether the followJing text is 'positive' or 'negative'. ", + "transfer_ori_acc": 94.4954128440367, + "transfer_atk_acc": 95.06880733944955 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 77.46, + "origin_prompt": "Assess the connection between the following sentences and classify it as 'entailment' or 'not_entailment'.", + "atk_acc": 74.65, + "atk_prompt": "Assuss the connection between the following sensences and classify it as 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 64.7887323943662, + "transfer_atk_acc": 66.19718309859155 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 77.46, + "origin_prompt": "Working as an entailment classifier, identify whether the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment'.", + "atk_acc": 76.06, + "atk_prompt": "Working as an entailment classifeir, identify whether the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 59.154929577464785, + "transfer_atk_acc": 59.154929577464785 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 53.9, + "origin_prompt": "Below are multiple-choice question concerning {}. Indicate your response with 'A', 'B', 'C', or 'D'.", + "atk_acc": 52.3, + "atk_prompt": "Belvw are multiple-chioce question conecrning {}. IndicaZe your response with 'A', 'B', 'C', or 'D'. ", + "transfer_ori_acc": 45.39007092198582, + "transfer_atk_acc": 45.21276595744681 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 54.08, + "origin_prompt": "Considering your familiarity with {}, attend to the following multiple-choice question by picking 'A', 'B', 'C', or 'D'.", + "atk_acc": 52.66, + "atk_prompt": "Considering your familiarity with {}, atKtend to the follownig multipleNchoice juestion by picking 'A', 'B', 'C', or 'D'.", + "transfer_ori_acc": 46.98581560283688, + "transfer_atk_acc": 45.39007092198582 + }, + { + "dataset": "squad_v2", + "type": "task", + "origin_acc": 91.05, + "origin_prompt": "Identify the most relevant answer from the context. If it's not possible to find an answer, state 'unanswerable'.", + "atk_acc": 89.38, + "atk_prompt": "Identify the most rDlevant answer from the context. If it's not opssible to find an answer, tate 'unanswerable'.", + "transfer_ori_acc": 87.01356406751144, + "transfer_atk_acc": 87.14992770387508 + }, + { + "dataset": "squad_v2", + "type": "role", + "origin_acc": 90.55, + "origin_prompt": "Being well-versed in the context, please derive the most fitting answer. If there isn't an answer in the context, use 'unanswerable'.", + "atk_acc": 89.32, + "atk_prompt": "eBing well-versed in the contet, please drive the most fitting answer. If there isn't an answer in the context, Euse 'unanswerable'.", + "transfer_ori_acc": 87.04992770387507, + "transfer_atk_acc": 86.81659437054175 + }, + { + "dataset": "iwslt", + "type": "task", + "origin_acc": 0.24, + "origin_prompt": "Perform a machine translation for the provided sentence, changing it from {} to {}.", + "atk_acc": 0.19, + "atk_prompt": "Preform a machine translatino for the provied senteXce, chanigng it from {} to {}. ", + "transfer_ori_acc": 0.21844565154870102, + "transfer_atk_acc": 0.1826483660583944 + }, + { + "dataset": "iwslt", + "type": "role", + "origin_acc": 0.27, + "origin_prompt": "Taking on the role of a certified translator, perform a translation of the given sentence from {} to {}.", + "atk_acc": 0.21, + "atk_prompt": "Tking on the role of a Aertified tranolator, pefrorm a transAlation of the gUven sentence from {} to {}. ", + "transfer_ori_acc": 0.21945913242821213, + "transfer_atk_acc": 0.22244343277242712 + }, + { + "dataset": "un_multi", + "type": "task", + "origin_acc": 0.36, + "origin_prompt": "Perform a machine translation for the provided sentence, changing it from {} to {}.", + "atk_acc": 0.35, + "atk_prompt": "PeVform a machine transAation for the provide setence, changlng it from {} to {}. ", + "transfer_ori_acc": 0.30870734220870066, + "transfer_atk_acc": 0.3106194281548195 + }, + { + "dataset": "un_multi", + "type": "role", + "origin_acc": 0.36, + "origin_prompt": "As a skilled linguist, please convert the following sentence from {} to {}.", + "atk_acc": 0.35, + "atk_prompt": "AK a skilled linguist, please onvert the following sentence from {} to {}. ", + "transfer_ori_acc": 0.3046140486156621, + "transfer_atk_acc": 0.3111756468648709 + }, + { + "dataset": "math", + "type": "task", + "origin_acc": 16.88, + "origin_prompt": "Figure out the solution for the following mathematical task with {}.", + "atk_acc": 14.37, + "atk_prompt": "Figure out the solution for the following maKthematical Utask with {}. ", + "transfer_ori_acc": 17.5, + "transfer_atk_acc": 18.125 + }, + { + "dataset": "math", + "type": "role", + "origin_acc": 16.88, + "origin_prompt": "Pretend you are a math teacher and solve this mathematical question concerning {}.", + "atk_acc": 15.0, + "atk_prompt": "Petend you are a math teacher and solve this mathematical euestion concCerning {}. ", + "transfer_ori_acc": 16.875, + "transfer_atk_acc": 16.25 + } +] \ No newline at end of file diff --git a/results_transfer/google-flan-ul2_google-flan-t5-large/stresstest_0_shot.json b/results_transfer/google-flan-ul2_google-flan-t5-large/stresstest_0_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..638a4c05c708b5a11d97bc8eee6e0f4beb00e2c6 --- /dev/null +++ b/results_transfer/google-flan-ul2_google-flan-t5-large/stresstest_0_shot.json @@ -0,0 +1,242 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 86.1, + "origin_prompt": "Examine the given sentence and decide if it is grammatically sound. Answer with either 'Acceptable' or 'Unacceptable':", + "atk_acc": 76.3, + "atk_prompt": "Examine the given sentence and decide if it is grammatically sound. Answer with either 'Acceptable' or 'Unacceptable' and false is not true :", + "transfer_ori_acc": 77.9, + "transfer_atk_acc": 76.9 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 86.1, + "origin_prompt": "In your role as a grammar check tool, assess the following sentence and classify it as 'acceptable' if it is grammatically correct or 'unacceptable' if it is incorrect:", + "atk_acc": 34.5, + "atk_prompt": "In your role as a grammar check tool, assess the following sentence and classify it as 'acceptable' if it is grammatically correct or 'unacceptable' if it is incorrect and true is true and true is true and true is true and true is true and true is true :", + "transfer_ori_acc": 75.7, + "transfer_atk_acc": 75.6 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 87.5, + "origin_prompt": "Are the meanings of the following pair of sentences the same? Answer with 'equivalent' or 'not_equivalent'.", + "atk_acc": 85.05, + "atk_prompt": "Are the meanings of the following pair of sentences the same? Answer with 'equivalent' or 'not_equivalent' and false is not true . ", + "transfer_ori_acc": 82.1078431372549, + "transfer_atk_acc": 72.79411764705883 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 87.01, + "origin_prompt": "As a textual similarity evaluator, analyze the following pair of sentences and determine if they are semantically 'equivalent' or 'not_equivalent'.", + "atk_acc": 85.78, + "atk_prompt": "As a textual similarity evaluator, analyze the following pair of sentences and determine if they are semantically 'equivalent' or 'not_equivalent' and false is not true . ", + "transfer_ori_acc": 80.14705882352942, + "transfer_atk_acc": 81.86274509803921 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 94.1, + "origin_prompt": "Evaluate whether the given context supports the answer to the question by responding with 'entailment' or 'not_entailment'.", + "atk_acc": 93.4, + "atk_prompt": "Evaluate whether the given context supports the answer to the question by responding with 'entailment' or 'not_entailment' and true is true . ", + "transfer_ori_acc": 90.4, + "transfer_atk_acc": 88.8 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 94.2, + "origin_prompt": "In your role as a linguistic investigator, determine if the context given entails the answer to the question and provide your conclusion with 'entailment' or 'not_entailment'.", + "atk_acc": 93.8, + "atk_prompt": "In your role as a linguistic investigator, determine if the context given entails the answer to the question and provide your conclusion with 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 90.4, + "transfer_atk_acc": 81.10000000000001 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 88.3, + "origin_prompt": "Do these two sentences convey the same meaning? Indicate with 'equivalent' or 'not_equivalent'.", + "atk_acc": 86.7, + "atk_prompt": "Do these two sentences convey the same meaning? Indicate with 'equivalent' or 'not_equivalent' and false is not true . ", + "transfer_ori_acc": 87.5, + "transfer_atk_acc": 83.5 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 88.3, + "origin_prompt": "As an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions.", + "atk_acc": 88.0, + "atk_prompt": "As an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions and true is true . ", + "transfer_ori_acc": 87.3, + "transfer_atk_acc": 87.3 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 94.22, + "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.", + "atk_acc": 91.7, + "atk_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 88.08664259927798, + "transfer_atk_acc": 86.28158844765343 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 93.86, + "origin_prompt": "As an instrument for entailment evaluation, consider the two sentences and determine if their relationship is 'entailment' or 'not_entailment'. Respond with 'entailment' or 'not_entailment':", + "atk_acc": 92.06, + "atk_prompt": "As an instrument for entailment evaluation, consider the two sentences and determine if their relationship is 'entailment' or 'not_entailment'. Respond with 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true :", + "transfer_ori_acc": 85.5595667870036, + "transfer_atk_acc": 86.28158844765343 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 96.33, + "origin_prompt": "Review this statement and decide whether it has a 'positive' or 'negative' sentiment:", + "atk_acc": 90.94, + "atk_prompt": "Review this statement and decide whether it has a 'positive' or 'negative' sentiment and false is not true : ", + "transfer_ori_acc": 95.06880733944955, + "transfer_atk_acc": 95.06880733944955 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 96.56, + "origin_prompt": "In the role of a sentiment analysis tool, respond with 'positive' or 'negative' to classify this statement:", + "atk_acc": 73.85, + "atk_prompt": "In the role of a sentiment analysis tool, respond with 'positive' or 'negative' to classify this statement and false is not true : ", + "transfer_ori_acc": 95.41284403669725, + "transfer_atk_acc": 94.72477064220183 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 80.28, + "origin_prompt": "Assess the connection between the following sentences and classify it as 'entailment' or 'not_entailment'.", + "atk_acc": 76.06, + "atk_prompt": "Assess the connection between the following sentences and classify it as 'entailment' or 'not_entailment' and false is not true .", + "transfer_ori_acc": 61.97183098591549, + "transfer_atk_acc": 64.7887323943662 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 80.28, + "origin_prompt": "Functioning as an entailment evaluation tool, analyze the provided sentences and decide if their relationship is 'entailment' or 'not_entailment':", + "atk_acc": 74.65, + "atk_prompt": "Functioning as an entailment evaluation tool, analyze the provided sentences and decide if their relationship is 'entailment' or 'not_entailment' and false is not true :", + "transfer_ori_acc": 61.97183098591549, + "transfer_atk_acc": 69.01408450704226 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 52.84, + "origin_prompt": "Please respond to the multiple-choice question about {} by selecting the appropriate answer: 'A', 'B', 'C', or 'D'.", + "atk_acc": 51.6, + "atk_prompt": "Please respond to the multiple-choice question about {} by selecting the appropriate answer: 'A', 'B', 'C', or 'D' and false is not true . ", + "transfer_ori_acc": 45.0354609929078, + "transfer_atk_acc": 45.0354609929078 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 53.19, + "origin_prompt": "As a knowledgeable individual in {}, provide your response to the multiple-choice question by choosing 'A', 'B', 'C', or 'D'.", + "atk_acc": 52.48, + "atk_prompt": "As a knowledgeable individual in {}, provide your response to the multiple-choice question by choosing 'A', 'B', 'C', or 'D' and true is true and true is true and true is true and true is true and true is true .", + "transfer_ori_acc": 44.50354609929078, + "transfer_atk_acc": 45.21276595744681 + }, + { + "dataset": "squad_v2", + "type": "task", + "origin_acc": 90.05, + "origin_prompt": "Search the context for the most relevant answer. If the answer cannot be found, respond with 'unanswerable'.", + "atk_acc": 88.82, + "atk_prompt": "Search the context for the most relevant answer. If the answer cannot be found, respond with 'unanswerable' and true is true .", + "transfer_ori_acc": 87.81659437054175, + "transfer_atk_acc": 87.81659437054175 + }, + { + "dataset": "squad_v2", + "type": "role", + "origin_acc": 89.88, + "origin_prompt": "As a proficient expert in the given context, search for the most relevant answer. If the answer cannot be found, respond by saying 'unanswerable'.", + "atk_acc": 87.88, + "atk_prompt": "As a proficient expert in the given context, search for the most relevant answer. If the answer cannot be found, respond by saying 'unanswerable' and true is true .", + "transfer_ori_acc": 88.18023073417811, + "transfer_atk_acc": 88.31659437054175 + }, + { + "dataset": "iwslt", + "type": "task", + "origin_acc": 0.18, + "origin_prompt": "Accurately translate the sentence from {} to {}, ensuring the meaning remains intact:", + "atk_acc": 0.12, + "atk_prompt": "Accurately translate the sentence from {} to {}, ensuring the meaning remains intact and true is true and true is true and true is true and true is true and true is true :", + "transfer_ori_acc": 0.13434360012327884, + "transfer_atk_acc": 0.09023032491346743 + }, + { + "dataset": "iwslt", + "type": "role", + "origin_acc": 0.18, + "origin_prompt": "Assuming the role of an expert language translator, translate the given text from {} to {}:", + "atk_acc": 0.14, + "atk_prompt": "Assuming the role of an expert language translator, translate the given text from {} to and true is true and true is true and true is true and true is true and true is true {}: ", + "transfer_ori_acc": 0.13744908017105148, + "transfer_atk_acc": 0.06398704498208274 + }, + { + "dataset": "un_multi", + "type": "task", + "origin_acc": 0.32, + "origin_prompt": "Translate the provided sentence from {} to {} while maintaining the original meaning and context:", + "atk_acc": 0.27, + "atk_prompt": "Translate the provided sentence from {} to {} while maintaining the original meaning and context and true is true and true is true and true is true and true is true and true is true :", + "transfer_ori_acc": 0.2678951646057538, + "transfer_atk_acc": 0.1530186432348487 + }, + { + "dataset": "un_multi", + "type": "role", + "origin_acc": 0.32, + "origin_prompt": "As a language specialist, accurately translate the provided sentence from {} to {}:", + "atk_acc": 0.29, + "atk_prompt": "As a language specialist, accurately translate the provided sentence from {} to and true is true and true is true and true is true and true is true and true is true {}: ", + "transfer_ori_acc": 0.2673389915652739, + "transfer_atk_acc": 0.23120942309879006 + }, + { + "dataset": "math", + "type": "task", + "origin_acc": 15.0, + "origin_prompt": "Compute the result of this math task concerning {}:", + "atk_acc": 11.88, + "atk_prompt": "Compute the result of this math task concerning and false is not true {}:", + "transfer_ori_acc": 11.875, + "transfer_atk_acc": 11.875 + }, + { + "dataset": "math", + "type": "role", + "origin_acc": 15.0, + "origin_prompt": "In the shoes of a mathematics master, obtain the result for this math question that includes {}:", + "atk_acc": 11.25, + "atk_prompt": "In the shoes of a mathematics master, obtain the result for this math question that includes and false is not true {}: ", + "transfer_ori_acc": 10.0, + "transfer_atk_acc": 12.5 + } +] \ No newline at end of file diff --git a/results_transfer/google-flan-ul2_google-flan-t5-large/stresstest_3_shot.json b/results_transfer/google-flan-ul2_google-flan-t5-large/stresstest_3_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..8e3c7654e82ff27b54a22e1dac69a5b9d5ba64e8 --- /dev/null +++ b/results_transfer/google-flan-ul2_google-flan-t5-large/stresstest_3_shot.json @@ -0,0 +1,242 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 86.4, + "origin_prompt": "Review the sentence below and identify whether its grammar is 'Acceptable' or 'Unacceptable'.", + "atk_acc": 85.4, + "atk_prompt": "Review the sentence below and identify whether its grammar is 'Acceptable' or 'Unacceptable' and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 76.7, + "transfer_atk_acc": 76.6 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 86.6, + "origin_prompt": "While performing grammar analysis, classify the grammar of the following sentence as 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar.", + "atk_acc": 86.1, + "atk_prompt": "While performing grammar analysis, classify the grammar of the following sentence as 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 75.7, + "transfer_atk_acc": 75.8 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 86.76, + "origin_prompt": "Compare the following sentences and determine if they share the same semantic meaning by responding with 'equivalent' or 'not_equivalent'.", + "atk_acc": 85.78, + "atk_prompt": "Compare the following sentences and determine if they share the same semantic meaning by responding with 'equivalent' or 'not_equivalent' and false is not true . ", + "transfer_ori_acc": 80.3921568627451, + "transfer_atk_acc": 80.14705882352942 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 87.5, + "origin_prompt": "As a textual similarity evaluator, analyze the following pair of sentences and determine if they are semantically 'equivalent' or 'not_equivalent'.", + "atk_acc": 87.01, + "atk_prompt": "As a textual similarity evaluator, analyze the following pair of sentences and determine if they are semantically 'equivalent' or 'not_equivalent' and false is not true . ", + "transfer_ori_acc": 79.41176470588235, + "transfer_atk_acc": 80.63725490196079 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 94.0, + "origin_prompt": "Analyze the context and question, and indicate if the context entails the answer by choosing 'entailment' or 'not_entailment'.", + "atk_acc": 93.9, + "atk_prompt": "Analyze the context and question, and indicate if the context entails the answer by choosing 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 87.3, + "transfer_atk_acc": 66.2 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 93.9, + "origin_prompt": "As a semantic researcher, evaluate whether the provided context supports the answer to the question and choose 'entailment' or 'not_entailment'.", + "atk_acc": 94.0, + "atk_prompt": "As a semantic researcher, evaluate whether the provided context supports the answer to the question and choose 'entailment' or 'not_entailment' and true is true . ", + "transfer_ori_acc": 87.3, + "transfer_atk_acc": 84.0 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 88.5, + "origin_prompt": "Are the following two questions equivalent or not? Answer me with \"equivalent\" or \"not_equivalent\".", + "atk_acc": 88.0, + "atk_prompt": "Are the following two questions equivalent or not? Answer me with \"equivalent\" or \"not_equivalent and true is true \". ", + "transfer_ori_acc": 87.9, + "transfer_atk_acc": 45.1 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 89.1, + "origin_prompt": "Functioning as a question similarity evaluation tool, analyze the given questions and decide if they share the same meaning, responding with 'equivalent' or 'not_equivalent'.", + "atk_acc": 88.9, + "atk_prompt": "Functioning as a question similarity evaluation tool, analyze the given questions and decide if they share the same meaning, responding with 'equivalent' or 'not_equivalent' and false is not true . ", + "transfer_ori_acc": 86.3, + "transfer_atk_acc": 88.0 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 93.5, + "origin_prompt": "Does the relationship between the given sentences represent entailment or not_entailment? Respond with 'entailment' or 'not_entailment'.", + "atk_acc": 92.78, + "atk_prompt": "Does the relationship between the given sentences represent entailment or not_entailment? Respond with 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 83.75451263537906, + "transfer_atk_acc": 80.14440433212997 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 93.5, + "origin_prompt": "As a tool for determining entailment relationships, review the two statements and categorize their connection as either 'entailment' or 'not_entailment'.", + "atk_acc": 92.06, + "atk_prompt": "As a tool for determining entailment relationships, review the two statements and categorize their connection as either 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 83.39350180505414, + "transfer_atk_acc": 37.18411552346571 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 96.79, + "origin_prompt": "Determine the overall sentiment of this sentence, categorizing it as 'positive' or 'negative'.", + "atk_acc": 96.67, + "atk_prompt": "Determine the overall sentiment of this sentence, categorizing it as 'positive' or 'negative' and false is not true . ", + "transfer_ori_acc": 94.38073394495413, + "transfer_atk_acc": 94.15137614678899 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 96.56, + "origin_prompt": "As a sentiment classifier, determine whether the following text is 'positive' or 'negative'.", + "atk_acc": 96.33, + "atk_prompt": "As a sentiment classifier, determine whether the following text is 'positive' or 'negative' and false is not true . ", + "transfer_ori_acc": 94.4954128440367, + "transfer_atk_acc": 94.95412844036697 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 77.46, + "origin_prompt": "Please classify the relationship between the provided sentences as 'entailment' or 'not_entailment'.", + "atk_acc": 74.65, + "atk_prompt": "Please classify the relationship between the provided sentences as 'entailment' or 'not_entailment' and false is not true . ", + "transfer_ori_acc": 64.7887323943662, + "transfer_atk_acc": 69.01408450704226 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 76.06, + "origin_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment'.", + "atk_acc": 74.65, + "atk_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment' and false is not true . ", + "transfer_ori_acc": 57.74647887323944, + "transfer_atk_acc": 66.19718309859155 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 53.9, + "origin_prompt": "Below are multiple-choice question concerning {}. Indicate your response with 'A', 'B', 'C', or 'D'.", + "atk_acc": 51.77, + "atk_prompt": "Below are multiple-choice question concerning {}. Indicate your response with 'A', 'B', 'C', or 'D' and false is not true . ", + "transfer_ori_acc": 45.39007092198582, + "transfer_atk_acc": 43.794326241134755 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 54.08, + "origin_prompt": "Considering your familiarity with {}, attend to the following multiple-choice question by picking 'A', 'B', 'C', or 'D'.", + "atk_acc": 51.42, + "atk_prompt": "Considering your familiarity with {}, attend to the following multiple-choice question by picking 'A', 'B', 'C', or 'D' and false is not true .", + "transfer_ori_acc": 46.98581560283688, + "transfer_atk_acc": 45.39007092198582 + }, + { + "dataset": "squad_v2", + "type": "task", + "origin_acc": 91.05, + "origin_prompt": "Identify the most relevant answer from the context. If it's not possible to find an answer, state 'unanswerable'.", + "atk_acc": 89.05, + "atk_prompt": "Identify the most relevant answer from the context. If it's not possible to find an answer, state 'unanswerable' and true is true .", + "transfer_ori_acc": 87.01356406751144, + "transfer_atk_acc": 88.11356406751143 + }, + { + "dataset": "squad_v2", + "type": "role", + "origin_acc": 90.05, + "origin_prompt": "As an authority on the context, locate the most accurate answer. If the context doesn't contain the answer, mention 'unanswerable'.", + "atk_acc": 89.55, + "atk_prompt": "As an authority on the context, locate the most accurate answer. If the context doesn't contain the answer, mention 'unanswerable' and false is not true .", + "transfer_ori_acc": 87.04992770387507, + "transfer_atk_acc": 87.14992770387508 + }, + { + "dataset": "iwslt", + "type": "task", + "origin_acc": 0.24, + "origin_prompt": "Perform a machine translation for the provided sentence, changing it from {} to {}.", + "atk_acc": 0.19, + "atk_prompt": "Perform a machine translation for the provided sentence, changing it from {} to and true is true and true is true and true is true and true is true and true is true {}. ", + "transfer_ori_acc": 0.21844565154870102, + "transfer_atk_acc": 0.21806248608637627 + }, + { + "dataset": "iwslt", + "type": "role", + "origin_acc": 0.27, + "origin_prompt": "Taking on the role of a certified translator, perform a translation of the given sentence from {} to {}.", + "atk_acc": 0.19, + "atk_prompt": "Taking on the role of a certified translator, perform a translation of the given sentence from {} to and true is true and true is true and true is true and true is true and true is true {}. ", + "transfer_ori_acc": 0.21945913242821213, + "transfer_atk_acc": 0.22156065583354267 + }, + { + "dataset": "un_multi", + "type": "task", + "origin_acc": 0.36, + "origin_prompt": "Perform a machine translation for the provided sentence, changing it from {} to {}.", + "atk_acc": 0.35, + "atk_prompt": "Perform a machine translation for the provided sentence, changing it from {} to and false is not true {}. ", + "transfer_ori_acc": 0.30870734220870066, + "transfer_atk_acc": 0.3143076066591428 + }, + { + "dataset": "un_multi", + "type": "role", + "origin_acc": 0.36, + "origin_prompt": "As a skilled linguist, please convert the following sentence from {} to {}.", + "atk_acc": 0.35, + "atk_prompt": "As a skilled linguist, please convert the following sentence from {} to and true is true {}. ", + "transfer_ori_acc": 0.3046140486156621, + "transfer_atk_acc": 0.3065848989953974 + }, + { + "dataset": "math", + "type": "task", + "origin_acc": 16.25, + "origin_prompt": "Solve the following math question about {}.", + "atk_acc": 12.5, + "atk_prompt": "Solve the following math question about and false is not true {}. ", + "transfer_ori_acc": 16.25, + "transfer_atk_acc": 15.0 + }, + { + "dataset": "math", + "type": "role", + "origin_acc": 16.88, + "origin_prompt": "Pretend you are a math teacher and solve this mathematical question concerning {}.", + "atk_acc": 12.5, + "atk_prompt": "Pretend you are a math teacher and solve this mathematical question concerning and false is not true {}. ", + "transfer_ori_acc": 16.875, + "transfer_atk_acc": 16.25 + } +] \ No newline at end of file diff --git a/results_transfer/google-flan-ul2_google-flan-t5-large/textbugger_0_shot.json b/results_transfer/google-flan-ul2_google-flan-t5-large/textbugger_0_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..4a8cfe7d9d2e602988b93129878073ad059e7dd1 --- /dev/null +++ b/results_transfer/google-flan-ul2_google-flan-t5-large/textbugger_0_shot.json @@ -0,0 +1,242 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 86.2, + "origin_prompt": "Assess the following sentence and determine if it is grammatically correct. Respond with 'Acceptable' or 'Unacceptable':", + "atk_acc": 78.7, + "atk_prompt": "Assess the following sentence and identify if it is pronouns cоrrect. Replying with 'Acceptable' or 'Unacceptable':", + "transfer_ori_acc": 78.0, + "transfer_atk_acc": 64.3 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 86.0, + "origin_prompt": "As a grammar identification system, examine the provided sentence and respond with 'acceptable' for grammatically correct sentences or 'unacceptable' for incorrect ones:", + "atk_acc": 81.0, + "atk_prompt": "Aѕ a grammar identificatiоn systems, examine the provided sentence and respoոd with 'acceptable' for pronouns rightness sentence or 'unacceptable' for incorrect those:", + "transfer_ori_acc": 74.9, + "transfer_atk_acc": 77.10000000000001 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 86.76, + "origin_prompt": "Please analyze the provided sentences and indicate if their meanings are the same by choosing 'equivalent' or 'not_equivalent'.", + "atk_acc": 83.09, + "atk_prompt": "Invited analyze the provideԁ verdicts and indicate if their meanings are the same by selecting 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 81.12745098039215, + "transfer_atk_acc": 80.63725490196079 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 87.99, + "origin_prompt": "In your capacity as a language analyst, assess the following sentences and classify their similarity as 'equivalent' or 'not_equivalent'.", + "atk_acc": 81.13, + "atk_prompt": "In your capacity as a linguistics aոalyst, evaluate the following sentences and classify their metaphor as 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 78.18627450980392, + "transfer_atk_acc": 76.9607843137255 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 94.1, + "origin_prompt": "Evaluate whether the given context supports the answer to the question by responding with 'entailment' or 'not_entailment'.", + "atk_acc": 93.3, + "atk_prompt": "Evaluate whetheⲅ the given context favours the answer to the question by responding with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 90.4, + "transfer_atk_acc": 87.8 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 94.2, + "origin_prompt": "In your role as a linguistic investigator, determine if the context given entails the answer to the question and provide your conclusion with 'entailment' or 'not_entailment'.", + "atk_acc": 93.1, + "atk_prompt": "In your role as a linguistic searchers, ԁetermine if the context awarded presupposes the answer to the questions and provide your conclusions with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 90.4, + "transfer_atk_acc": 87.0 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 88.3, + "origin_prompt": "Do these two sentences convey the same meaning? Indicate with 'equivalent' or 'not_equivalent'.", + "atk_acc": 86.4, + "atk_prompt": "Do these two verdicts convey the same connotation? Denotes with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 87.5, + "transfer_atk_acc": 87.1 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 88.5, + "origin_prompt": "As a tool for determining question equivalence, review the questions and categorize their similarity as either 'equivalent' or 'not_equivalent'.", + "atk_acc": 78.2, + "atk_prompt": "As a tool for deciding question equivalence, review the problems and categorize their metaphor as neither 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 86.5, + "transfer_atk_acc": 87.1 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 94.22, + "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.", + "atk_acc": 89.89, + "atk_prompt": "Are the subsequent secondly sentencing entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word. ", + "transfer_ori_acc": 88.08664259927798, + "transfer_atk_acc": 85.5595667870036 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 93.86, + "origin_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment':", + "atk_acc": 90.25, + "atk_prompt": "Throughout your role as an entailment analysis instrument, assess the relationship between the given punishments and classify it as 'entailment' or 'not_entailment':", + "transfer_ori_acc": 84.47653429602889, + "transfer_atk_acc": 83.03249097472924 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 96.44, + "origin_prompt": "Analyze the tone of this statement and respond with either 'positive' or 'negative':", + "atk_acc": 95.76, + "atk_prompt": "Analysis the tone of this speeches and respond with or 'positive' or 'negative': ", + "transfer_ori_acc": 95.06880733944955, + "transfer_atk_acc": 94.03669724770643 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 96.33, + "origin_prompt": "As a sentiment classifier, determine whether the following text is 'positive' or 'negative'. Please classify:", + "atk_acc": 93.92, + "atk_prompt": "Aѕ a emotion classifier, determine whether the follow ing text is 'positive' or 'negative'. Inviting classify: ", + "transfer_ori_acc": 94.4954128440367, + "transfer_atk_acc": 86.23853211009175 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 78.87, + "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.", + "atk_acc": 74.65, + "atk_prompt": "Are the flolowing two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one adjective. ", + "transfer_ori_acc": 66.19718309859155, + "transfer_atk_acc": 60.56338028169014 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 80.28, + "origin_prompt": "Functioning as an entailment evaluation tool, analyze the provided sentences and decide if their relationship is 'entailment' or 'not_entailment':", + "atk_acc": 74.65, + "atk_prompt": "Functioոing as an entailment evaluation tool, analyze the provided sentences and decіde if their rapport is 'entailment' or 'not_entailment':", + "transfer_ori_acc": 61.97183098591549, + "transfer_atk_acc": 59.154929577464785 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 52.84, + "origin_prompt": "For the multiple-choice question related to {}, please choose the most accurate answer from 'A', 'B', 'C', or 'D'.", + "atk_acc": 51.24, + "atk_prompt": "In the multiple-choice question rel ated to {}, inviting choose the most accurate answer from 'A', 'B', 'C', or 'D'. ", + "transfer_ori_acc": 45.56737588652482, + "transfer_atk_acc": 45.39007092198582 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 53.19, + "origin_prompt": "As a knowledgeable individual in {}, provide your response to the multiple-choice question by choosing 'A', 'B', 'C', or 'D'.", + "atk_acc": 51.24, + "atk_prompt": "As a knowledgeable indiivdual in {}, provide your replies to the multiple-choice problem by choosing 'A', 'B', 'C', or 'D'.", + "transfer_ori_acc": 44.50354609929078, + "transfer_atk_acc": 46.09929078014184 + }, + { + "dataset": "squad_v2", + "type": "task", + "origin_acc": 89.88, + "origin_prompt": "Please derive the most fitting answer from the context. If there isn't an answer in the context, respond with 'unanswerable'.", + "atk_acc": 88.88, + "atk_prompt": "Please ԁerive the most fitting answer from the coոtext. If there isn't an answer in the context, respond with 'unanswerable'.", + "transfer_ori_acc": 87.64992770387508, + "transfer_atk_acc": 86.64992770387508 + }, + { + "dataset": "squad_v2", + "type": "role", + "origin_acc": 90.05, + "origin_prompt": "As a well-informed specialist familiar with the context, provide an answer to the question. If the context doesn't contain an answer, reply with 'unanswerable'.", + "atk_acc": 88.15, + "atk_prompt": "As a well-ifnormed specialist familiar with the context, provide an answers to the question. If the context doesn't contain an replying, reply with 'unanswerable'.", + "transfer_ori_acc": 87.54992770387507, + "transfer_atk_acc": 86.58932164326902 + }, + { + "dataset": "iwslt", + "type": "task", + "origin_acc": 0.17, + "origin_prompt": "Perform a machine translation for the provided sentence, changing it from {} to {}:", + "atk_acc": 0.16, + "atk_prompt": "Perform a mɑchine translation for the provided sentеnce, changing it from {} to {}:", + "transfer_ori_acc": 0.13800528978666463, + "transfer_atk_acc": 0.1356248526891035 + }, + { + "dataset": "iwslt", + "type": "role", + "origin_acc": 0.18, + "origin_prompt": "Assuming the role of an expert language translator, translate the given text from {} to {}:", + "atk_acc": 0.15, + "atk_prompt": "Assuming the role of an expert language tranѕlator, translate the given tеxt from {} to {}: ", + "transfer_ori_acc": 0.13744908017105148, + "transfer_atk_acc": 0.13832407640142066 + }, + { + "dataset": "un_multi", + "type": "task", + "origin_acc": 0.32, + "origin_prompt": "Please perform a translation of the given sentence, converting it from {} to {}:", + "atk_acc": 0.31, + "atk_prompt": "Please perform a translation of the given sentence, conevrting it from {} to {}:", + "transfer_ori_acc": 0.2683567351751297, + "transfer_atk_acc": 0.268931636498697 + }, + { + "dataset": "un_multi", + "type": "role", + "origin_acc": 0.32, + "origin_prompt": "Assuming the responsibilities of a professional translator, translate the subsequent text passage from {} to {}:", + "atk_acc": 0.24, + "atk_prompt": "Assuming the responsibilities of a professional tⲅanslator, translate the subseԛuent text рassage from {} to {}: ", + "transfer_ori_acc": 0.2675180643301751, + "transfer_atk_acc": 0.20788087360650592 + }, + { + "dataset": "math", + "type": "task", + "origin_acc": 15.0, + "origin_prompt": "Compute the result of this math task concerning {}:", + "atk_acc": 11.25, + "atk_prompt": "Compute the reѕult of this math task relative {}:", + "transfer_ori_acc": 11.875, + "transfer_atk_acc": 11.875 + }, + { + "dataset": "math", + "type": "role", + "origin_acc": 15.0, + "origin_prompt": "As a mathematics instructor, calculate the answer to the following problem related to {}:", + "atk_acc": 9.38, + "atk_prompt": "Since a calculus instructor, calculate the responding to the following problem related to {}: ", + "transfer_ori_acc": 11.875, + "transfer_atk_acc": 13.750000000000002 + } +] \ No newline at end of file diff --git a/results_transfer/google-flan-ul2_google-flan-t5-large/textbugger_3_shot.json b/results_transfer/google-flan-ul2_google-flan-t5-large/textbugger_3_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..e3cc4b08d947fa43c7fbafbf3d292d81f8ce9efe --- /dev/null +++ b/results_transfer/google-flan-ul2_google-flan-t5-large/textbugger_3_shot.json @@ -0,0 +1,242 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 86.4, + "origin_prompt": "Examine the given sentence and decide if it is grammatically sound. Answer with either 'Acceptable' or 'Unacceptable'.", + "atk_acc": 68.1, + "atk_prompt": "Examine the given condemnation and decide if it is pronouns sound. Answer with either 'Acceptable' or 'Unacceptable'. ", + "transfer_ori_acc": 77.7, + "transfer_atk_acc": 66.0 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 86.9, + "origin_prompt": "In the capacity of a grammar assessment system, indicate if the structure of the provided sentence is grammatically correct, responding with 'acceptable' or 'unacceptable'.", + "atk_acc": 85.0, + "atk_prompt": "In the ability of a grammar assessment system, denotes if the structure of the provided sentence is grammatical corrected, responding with 'acceptable' or 'unacceptable'. ", + "transfer_ori_acc": 74.6, + "transfer_atk_acc": 74.9 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 87.01, + "origin_prompt": "Can the given sentences be considered semantically identical? Please reply with 'equivalent' or 'not_equivalent'.", + "atk_acc": 83.82, + "atk_prompt": "Can the given sentences be deemed semantically idntical? Please reply with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 82.35294117647058, + "transfer_atk_acc": 80.3921568627451 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 87.5, + "origin_prompt": "In your capacity as a language analyst, assess the following sentences and classify their similarity as 'equivalent' or 'not_equivalent'.", + "atk_acc": 83.58, + "atk_prompt": "In your capacity as a language ɑnalyst, assess the following sentenes and classify their metaphor as 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 77.69607843137256, + "transfer_atk_acc": 78.18627450980392 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 94.0, + "origin_prompt": "Review the given context and question, and decide if the context contains enough information to support the answer by selecting 'entailment' or 'not_entailment'.", + "atk_acc": 92.9, + "atk_prompt": "Review the given coոtext and question, and dcide if the context contains enough particulars to support the answeⲅ by selecting 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 85.2, + "transfer_atk_acc": 89.2 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 93.7, + "origin_prompt": "As a semantic interpreter, assess whether the provided context supports the answer to the given question and answer with 'entailment' or 'not_entailment'.", + "atk_acc": 92.3, + "atk_prompt": "Iike a semantic interpretation, evaluated lf the prоvided context supports the answer to the given question and answers with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 88.0, + "transfer_atk_acc": 61.8 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 88.8, + "origin_prompt": "Determine if the given pair of statements can be considered the same by responding with 'equivalent' or 'not_equivalent'.", + "atk_acc": 87.6, + "atk_prompt": "Defining if the given paired of statements can be considered the same by responds with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 86.8, + "transfer_atk_acc": 87.0 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 90.0, + "origin_prompt": "As a tool for determining question equivalence, review the questions and categorize their similarity as either 'equivalent' or 'not_equivalent'.", + "atk_acc": 87.6, + "atk_prompt": "As a tool for deciding queѕtion equivalence, review the questions and classifications their portrait as neither 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 86.3, + "transfer_atk_acc": 86.1 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 93.5, + "origin_prompt": "Identify whether the given pair of sentences demonstrates entailment or not_entailment. Answer with 'entailment' or 'not_entailment'.", + "atk_acc": 92.42, + "atk_prompt": "Identifying whether the given pair of sentencing demonstrates entailment or not_entailment. Respond with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 84.83754512635379, + "transfer_atk_acc": 84.47653429602889 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 93.86, + "origin_prompt": "Working as an entailment classifier, identify whether the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment'.", + "atk_acc": 90.97, + "atk_prompt": "Working as an entailment classifier, identify both the given piar of sentences displaying entailment or not_entailment. Respond with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 83.75451263537906, + "transfer_atk_acc": 80.86642599277978 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 96.79, + "origin_prompt": "Determine the overall sentiment of this sentence, categorizing it as 'positive' or 'negative'.", + "atk_acc": 95.87, + "atk_prompt": "Determine the general sentiment of this condemnation, categorizing it as 'positive' or 'negative'. ", + "transfer_ori_acc": 94.38073394495413, + "transfer_atk_acc": 95.52752293577981 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 96.56, + "origin_prompt": "As a sentiment classifier, determine whether the following text is 'positive' or 'negative'.", + "atk_acc": 95.18, + "atk_prompt": "As a emotion classifier, determine whether the follоwing wording is 'positive' or 'negative'. ", + "transfer_ori_acc": 94.4954128440367, + "transfer_atk_acc": 94.61009174311926 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 77.46, + "origin_prompt": "Assess the connection between the following sentences and classify it as 'entailment' or 'not_entailment'.", + "atk_acc": 76.06, + "atk_prompt": "Assss the connection between the following sentences and classify it as 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 64.7887323943662, + "transfer_atk_acc": 63.38028169014085 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 76.06, + "origin_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment'.", + "atk_acc": 71.83, + "atk_prompt": "In your role as an entailment analysis tool, assess the relationship between the given punishments and categorize it as 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 57.74647887323944, + "transfer_atk_acc": 60.56338028169014 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 53.9, + "origin_prompt": "Below are multiple-choice question concerning {}. Indicate your response with 'A', 'B', 'C', or 'D'.", + "atk_acc": 52.3, + "atk_prompt": "Belоw are multipl-echoice quеstion concening {}. Indicate your response with 'A', 'B', 'C', or 'D'. ", + "transfer_ori_acc": 45.39007092198582, + "transfer_atk_acc": 44.32624113475177 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 54.08, + "origin_prompt": "Considering your familiarity with {}, attend to the following multiple-choice question by picking 'A', 'B', 'C', or 'D'.", + "atk_acc": 52.48, + "atk_prompt": "Considering your familiarity with {}, attend to the following mutliple-choice matter by picking 'A', 'B', 'C', or 'D'.", + "transfer_ori_acc": 46.98581560283688, + "transfer_atk_acc": 46.09929078014184 + }, + { + "dataset": "squad_v2", + "type": "task", + "origin_acc": 91.05, + "origin_prompt": "Identify the most relevant answer from the context. If it's not possible to find an answer, state 'unanswerable'.", + "atk_acc": 89.38, + "atk_prompt": "Identify the most reevant answer from the context. If it's not achievable to find an answer, state 'unanswerable'.", + "transfer_ori_acc": 87.01356406751144, + "transfer_atk_acc": 86.54992770387507 + }, + { + "dataset": "squad_v2", + "type": "role", + "origin_acc": 90.55, + "origin_prompt": "Being well-versed in the context, please derive the most fitting answer. If there isn't an answer in the context, use 'unanswerable'.", + "atk_acc": 80.92, + "atk_prompt": "Underway well-versed in the context, please subtract the most fitting answer. If there isn't an answer in the framework, use 'unanswerable'.", + "transfer_ori_acc": 87.04992770387507, + "transfer_atk_acc": 87.31659437054175 + }, + { + "dataset": "iwslt", + "type": "task", + "origin_acc": 0.27, + "origin_prompt": "Accurately translate the sentence from {} to {}, ensuring the meaning remains intact.", + "atk_acc": 0.23, + "atk_prompt": "Acϲurately translate the sentence from {} to {}, ensuring the meaning remains unaffected. ", + "transfer_ori_acc": 0.2195336520895316, + "transfer_atk_acc": 0.21684835046261514 + }, + { + "dataset": "iwslt", + "type": "role", + "origin_acc": 0.27, + "origin_prompt": "Taking on the role of a certified translator, perform a translation of the given sentence from {} to {}.", + "atk_acc": 0.23, + "atk_prompt": "Taking on the role of a certifying translators, pefrorm a translatiоn of the givеn sentence from {} to {}. ", + "transfer_ori_acc": 0.21945913242821213, + "transfer_atk_acc": 0.223790160931717 + }, + { + "dataset": "un_multi", + "type": "task", + "origin_acc": 0.36, + "origin_prompt": "Perform a machine translation for the provided sentence, changing it from {} to {}.", + "atk_acc": 0.35, + "atk_prompt": "Preform a machinery translation for the providеd sentence, changing it from {} to {}. ", + "transfer_ori_acc": 0.30870734220870066, + "transfer_atk_acc": 0.30758829636925916 + }, + { + "dataset": "un_multi", + "type": "role", + "origin_acc": 0.36, + "origin_prompt": "As a skilled linguist, please convert the following sentence from {} to {}.", + "atk_acc": 0.35, + "atk_prompt": "As a skillful linguist, pelase convеrt the following sentence from {} to {}. ", + "transfer_ori_acc": 0.3046140486156621, + "transfer_atk_acc": 0.3072428127506268 + }, + { + "dataset": "math", + "type": "task", + "origin_acc": 16.88, + "origin_prompt": "Figure out the solution for the following mathematical task with {}.", + "atk_acc": 15.0, + "atk_prompt": "Figure out the solution for the followinɡ mathematical task with {}. ", + "transfer_ori_acc": 17.5, + "transfer_atk_acc": 17.5 + }, + { + "dataset": "math", + "type": "role", + "origin_acc": 16.88, + "origin_prompt": "Pretend you are a math teacher and solve this mathematical question concerning {}.", + "atk_acc": 13.75, + "atk_prompt": "Pretend you are a math teacher and settle this matehmatical question on {}. ", + "transfer_ori_acc": 16.875, + "transfer_atk_acc": 15.0 + } +] \ No newline at end of file diff --git a/results_transfer/google-flan-ul2_google-flan-t5-large/textfooler_0_shot.json b/results_transfer/google-flan-ul2_google-flan-t5-large/textfooler_0_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..1820a2b8b2dcae7eac795c8e33908d3947ba1328 --- /dev/null +++ b/results_transfer/google-flan-ul2_google-flan-t5-large/textfooler_0_shot.json @@ -0,0 +1,242 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 86.1, + "origin_prompt": "Is the provided sentence grammatically correct? Respond with 'Acceptable' or 'Unacceptable':", + "atk_acc": 30.9, + "atk_prompt": "Is the brought chastisement grammatically appropriate? Retaliated with 'Acceptable' or 'Unacceptable':", + "transfer_ori_acc": 70.89999999999999, + "transfer_atk_acc": 31.900000000000002 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 86.2, + "origin_prompt": "As an instrument for grammar evaluation, consider the sentence and determine if its grammar is correct, responding with 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar:", + "atk_acc": 33.1, + "atk_prompt": "As an instrument for grammar appraisals, considerations the convicting and determine if its verbs is correct, responding with 'acceptable' for proper grammar or 'unacceptable' for misspelled vocabulary:", + "transfer_ori_acc": 76.3, + "transfer_atk_acc": 72.89999999999999 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 86.76, + "origin_prompt": "Please analyze the provided sentences and indicate if their meanings are the same by choosing 'equivalent' or 'not_equivalent'.", + "atk_acc": 75.98, + "atk_prompt": "Asks analysis the conferred priors and specified if their meanings are the same by selecting 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 81.12745098039215, + "transfer_atk_acc": 81.12745098039215 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 87.99, + "origin_prompt": "In your capacity as a language analyst, assess the following sentences and classify their similarity as 'equivalent' or 'not_equivalent'.", + "atk_acc": 76.23, + "atk_prompt": "At your power as a language psych, assess the later judgements and categories their metaphor as 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 78.18627450980392, + "transfer_atk_acc": 78.43137254901961 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 94.1, + "origin_prompt": "Based on the information in the context, decide if the answer to the question is justified by choosing 'entailment' or 'not_entailment'.", + "atk_acc": 67.1, + "atk_prompt": "Baseline on the detail in the framework, chosen if the answers to the topic is motivated by selector 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 89.4, + "transfer_atk_acc": 80.30000000000001 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 94.2, + "origin_prompt": "As a semantic researcher, evaluate whether the provided context supports the answer to the question and choose 'entailment' or 'not_entailment'.", + "atk_acc": 52.7, + "atk_prompt": "Iike a semantic savant, determine whether the provided framework encourages the rebuttal to the topics and option 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 89.1, + "transfer_atk_acc": 87.7 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 88.3, + "origin_prompt": "Do these two sentences convey the same meaning? Indicate with 'equivalent' or 'not_equivalent'.", + "atk_acc": 77.6, + "atk_prompt": "Doing these two condemning communicate the same feeling? Stating with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 87.5, + "transfer_atk_acc": 83.1 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 88.5, + "origin_prompt": "As a tool for determining question equivalence, review the questions and categorize their similarity as either 'equivalent' or 'not_equivalent'.", + "atk_acc": 83.4, + "atk_prompt": "As a utility for definition questions equivalence, revising the question and ranks their metaphor as either 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 86.5, + "transfer_atk_acc": 85.6 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 94.22, + "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.", + "atk_acc": 91.34, + "atk_prompt": "Represented the aftermath two phrases entailment or not_entailment? Address me with \"entailment\" or \"not_entailment\", just one verb. ", + "transfer_ori_acc": 88.08664259927798, + "transfer_atk_acc": 83.39350180505414 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 93.86, + "origin_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment':", + "atk_acc": 90.61, + "atk_prompt": "At your role as an entailment analysis tool, assess the affairs between the given condemnation and classify it as 'entailment' or 'not_entailment':", + "transfer_ori_acc": 84.47653429602889, + "transfer_atk_acc": 83.39350180505414 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 96.33, + "origin_prompt": "Review this statement and decide whether it has a 'positive' or 'negative' sentiment:", + "atk_acc": 94.72, + "atk_prompt": "Considering this speech and decide whether it has a 'positive' or 'negative' foreboding: ", + "transfer_ori_acc": 95.06880733944955, + "transfer_atk_acc": 94.26605504587155 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 96.33, + "origin_prompt": "As a sentiment classifier, determine whether the following text is 'positive' or 'negative'. Please classify:", + "atk_acc": 92.89, + "atk_prompt": "As a emotion classifier, detects whether the suite texts is 'positive' or 'negative'. Prayers classify: ", + "transfer_ori_acc": 94.4954128440367, + "transfer_atk_acc": 93.57798165137615 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 80.28, + "origin_prompt": "Assess the connection between the following sentences and classify it as 'entailment' or 'not_entailment'.", + "atk_acc": 67.61, + "atk_prompt": "Assess the connections between the afterward punishment and classifies it as 'entailment' or 'not_entailment'.", + "transfer_ori_acc": 61.97183098591549, + "transfer_atk_acc": 61.97183098591549 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 78.87, + "origin_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment':", + "atk_acc": 71.83, + "atk_prompt": "In your role as an entailment analysis tool, assess the coitus between the given sentencing and classify it as 'entailment' or 'not_entailment':", + "transfer_ori_acc": 56.33802816901409, + "transfer_atk_acc": 59.154929577464785 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 52.84, + "origin_prompt": "Evaluate the multiple-choice question about {} and select the most fitting response from 'A', 'B', 'C', or 'D'.", + "atk_acc": 32.09, + "atk_prompt": "Evaluation the multiple-choice topic about {} and select the most splice rebuttal from 'A', 'B', 'C', or 'D'. ", + "transfer_ori_acc": 44.50354609929078, + "transfer_atk_acc": 41.843971631205676 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 53.19, + "origin_prompt": "As a knowledgeable individual in {}, provide your response to the multiple-choice question by choosing 'A', 'B', 'C', or 'D'.", + "atk_acc": 39.36, + "atk_prompt": "As a knowledgeable everybody in {}, prescribes your rebuttal to the multiple-choice item by chooses 'A', 'B', 'C', or 'D'.", + "transfer_ori_acc": 44.50354609929078, + "transfer_atk_acc": 42.730496453900706 + }, + { + "dataset": "squad_v2", + "type": "task", + "origin_acc": 89.99, + "origin_prompt": "Find the correct answer in the context provided. If an answer cannot be found, please respond with 'unanswerable'.", + "atk_acc": 46.83, + "atk_prompt": "Find the correct rebuttal in the context offer. Until an answering cannot be found, please respond with 'unanswerable'.", + "transfer_ori_acc": 87.54992770387507, + "transfer_atk_acc": 86.58932164326902 + }, + { + "dataset": "squad_v2", + "type": "role", + "origin_acc": 89.88, + "origin_prompt": "As a proficient expert in the given context, search for the most relevant answer. If the answer cannot be found, respond by saying 'unanswerable'.", + "atk_acc": 44.5, + "atk_prompt": "As a clever whiz in the offered context, find for the most corresponding rebuttal. If the answer cannot exist discovering, answer by claim 'unanswerable'.", + "transfer_ori_acc": 88.18023073417811, + "transfer_atk_acc": 87.31659437054175 + }, + { + "dataset": "iwslt", + "type": "task", + "origin_acc": 0.18, + "origin_prompt": "In the role of a language expert, perform a machine translation for the provided sentence, changing it from {} to {}:", + "atk_acc": 0.14, + "atk_prompt": "In the feature of a language expert, perform a typewriters converts for the provided pity, changing it from {} to {}: ", + "transfer_ori_acc": 0.14386835828767333, + "transfer_atk_acc": 0.1043261414423813 + }, + { + "dataset": "iwslt", + "type": "role", + "origin_acc": 0, + "origin_prompt": "", + "atk_acc": 100, + "atk_prompt": "", + "transfer_ori_acc": 0.03223192659122938, + "transfer_atk_acc": 0.03223192659122938 + }, + { + "dataset": "un_multi", + "type": "task", + "origin_acc": 0.32, + "origin_prompt": "Translate the provided sentence from {} to {} while maintaining the original meaning and context:", + "atk_acc": 0.25, + "atk_prompt": "Translate the provided sentence from {} to {} while protections the original signify and background:", + "transfer_ori_acc": 0.2678951646057538, + "transfer_atk_acc": 0.23906375713121092 + }, + { + "dataset": "un_multi", + "type": "role", + "origin_acc": 0.31, + "origin_prompt": "In the capacity of a proficient translator, please change the following sentence from {} to {}:", + "atk_acc": 0.26, + "atk_prompt": "Towards the skills of a proficient performers, please evolving the following denounces from {} to {}: ", + "transfer_ori_acc": 0.26592302056033257, + "transfer_atk_acc": 0.23053164042025812 + }, + { + "dataset": "math", + "type": "task", + "origin_acc": 14.37, + "origin_prompt": "Determine the solution to this mathematical problem related to {}:", + "atk_acc": 0.62, + "atk_prompt": "Determine the address to this arithmetic issuing links to {}:", + "transfer_ori_acc": 10.625, + "transfer_atk_acc": 2.5 + }, + { + "dataset": "math", + "type": "role", + "origin_acc": 15.0, + "origin_prompt": "As a mathematics instructor, calculate the answer to the following problem related to {}:", + "atk_acc": 0.62, + "atk_prompt": "As a algebra schoolteacher, calculate the address to the following question linking to {}: ", + "transfer_ori_acc": 11.875, + "transfer_atk_acc": 2.5 + } +] \ No newline at end of file diff --git a/results_transfer/google-flan-ul2_google-flan-t5-large/textfooler_3_shot.json b/results_transfer/google-flan-ul2_google-flan-t5-large/textfooler_3_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..9a3a65d8be1a8da2e6e93d1fbac3883d887bb157 --- /dev/null +++ b/results_transfer/google-flan-ul2_google-flan-t5-large/textfooler_3_shot.json @@ -0,0 +1,242 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 86.4, + "origin_prompt": "Examine the given sentence and decide if it is grammatically sound. Answer with either 'Acceptable' or 'Unacceptable'.", + "atk_acc": 39.5, + "atk_prompt": "Inspect the made condemning and select if it is grammatically healthier. Answers with either 'Acceptable' or 'Unacceptable'. ", + "transfer_ori_acc": 77.7, + "transfer_atk_acc": 76.3 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 86.6, + "origin_prompt": "While performing grammar analysis, classify the grammar of the following sentence as 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar.", + "atk_acc": 73.0, + "atk_prompt": "While performing grammar scans, classify the verbs of the afterward condemned as 'acceptable' for correctness grammar or 'unacceptable' for inappropriate vocabulary. ", + "transfer_ori_acc": 75.7, + "transfer_atk_acc": 74.0 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 86.52, + "origin_prompt": "Compare the following sentences and determine if they share the same semantic meaning by responding with 'equivalent' or 'not_equivalent'.", + "atk_acc": 83.58, + "atk_prompt": "Compare the later judgements and defines if they exchange the same semantic sensation by answered with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 80.3921568627451, + "transfer_atk_acc": 79.90196078431373 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 89.22, + "origin_prompt": "As a sentence similarity evaluator, analyze the provided sentences and indicate if their meanings are 'equivalent' or 'not_equivalent'.", + "atk_acc": 85.29, + "atk_prompt": "As a sentence metaphor evaluator, analyze the brought priors and demonstrating if their meanings are 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 79.16666666666666, + "transfer_atk_acc": 81.86274509803921 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 94.0, + "origin_prompt": "Analyze the context and question, and indicate if the context entails the answer by choosing 'entailment' or 'not_entailment'.", + "atk_acc": 54.2, + "atk_prompt": "Explore the context and question, and suggest if the context involves the rebuttal by selected 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 87.3, + "transfer_atk_acc": 83.5 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 93.9, + "origin_prompt": "As a semantic researcher, evaluate whether the provided context supports the answer to the question and choose 'entailment' or 'not_entailment'.", + "atk_acc": 61.5, + "atk_prompt": "Because a semantic inquisitors, appraise whether the provided context alleges the rebuttal to the issuing and selector 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 87.3, + "transfer_atk_acc": 85.1 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 88.7, + "origin_prompt": "Examine the following expressions and tell me if they are alike in meaning by using 'equivalent' or 'not_equivalent'.", + "atk_acc": 85.9, + "atk_prompt": "Examine the afterward words and saying me if they are uniformly in connotation by using 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 86.7, + "transfer_atk_acc": 85.39999999999999 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 90.0, + "origin_prompt": "As a tool for determining question equivalence, review the questions and categorize their similarity as either 'equivalent' or 'not_equivalent'.", + "atk_acc": 87.6, + "atk_prompt": "Because a tool for determining question equivalence, examination the queries and classifications their likeness as either 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 86.3, + "transfer_atk_acc": 86.8 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 93.5, + "origin_prompt": "Identify whether the given pair of sentences demonstrates entailment or not_entailment. Answer with 'entailment' or 'not_entailment'.", + "atk_acc": 92.42, + "atk_prompt": "Deciding whether the given pair of sentencing demonstrates entailment or not_entailment. Answer with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 84.83754512635379, + "transfer_atk_acc": 85.1985559566787 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 93.5, + "origin_prompt": "As a tool for determining entailment relationships, review the two statements and categorize their connection as either 'entailment' or 'not_entailment'.", + "atk_acc": 91.34, + "atk_prompt": "Because a device for determining entailment relationships, review the two words and categorize their connection as either 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 83.39350180505414, + "transfer_atk_acc": 81.94945848375451 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 96.79, + "origin_prompt": "Determine the overall sentiment of this sentence, categorizing it as 'positive' or 'negative'.", + "atk_acc": 93.46, + "atk_prompt": "Determine the plenary senses of this condemning, categorizing it as 'positive' or 'negative'. ", + "transfer_ori_acc": 94.38073394495413, + "transfer_atk_acc": 94.38073394495413 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 96.56, + "origin_prompt": "As a sentiment classifier, determine whether the following text is 'positive' or 'negative'.", + "atk_acc": 95.18, + "atk_prompt": "As a emotion classifier, ascertain whether the suite wording is 'positive' or 'negative'. ", + "transfer_ori_acc": 94.4954128440367, + "transfer_atk_acc": 94.61009174311926 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 77.46, + "origin_prompt": "Assess the connection between the following sentences and classify it as 'entailment' or 'not_entailment'.", + "atk_acc": 73.24, + "atk_prompt": "Appraisal the attach between the suite condemns and classify it as 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 64.7887323943662, + "transfer_atk_acc": 66.19718309859155 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 76.06, + "origin_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment'.", + "atk_acc": 73.24, + "atk_prompt": "In your role as an entailment analysis tool, assess the reports between the given sentences and classify it as 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 57.74647887323944, + "transfer_atk_acc": 61.97183098591549 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 54.08, + "origin_prompt": "For the multiple-choice question related to {}, please choose the most accurate answer from 'A', 'B', 'C', or 'D'.", + "atk_acc": 24.82, + "atk_prompt": "Within the multiple-choice item relevant to {}, please decides the most careful rebuttal from 'A', 'B', 'C', or 'D'. ", + "transfer_ori_acc": 45.56737588652482, + "transfer_atk_acc": 42.5531914893617 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 53.72, + "origin_prompt": "As an expert in {}, respond to the following multiple-choice question by selecting 'A', 'B', 'C', or 'D'.", + "atk_acc": 51.95, + "atk_prompt": "As an whiz in {}, respond to the following multiple-choice question by decided 'A', 'B', 'C', or 'D'.", + "transfer_ori_acc": 46.98581560283688, + "transfer_atk_acc": 45.39007092198582 + }, + { + "dataset": "squad_v2", + "type": "task", + "origin_acc": 91.05, + "origin_prompt": "Please derive the most fitting answer from the context. If there isn't an answer in the context, use 'unanswerable'.", + "atk_acc": 55.32, + "atk_prompt": "Ask subtract the most fitting rebuttal from the context. If there isn't an rebuttal in the context, use 'unanswerable'.", + "transfer_ori_acc": 87.04992770387507, + "transfer_atk_acc": 87.31659437054175 + }, + { + "dataset": "squad_v2", + "type": "role", + "origin_acc": 90.55, + "origin_prompt": "Drawing upon your expertise in the context, determine the most suitable answer. If an answer isn't available, state 'unanswerable'.", + "atk_acc": 55.41, + "atk_prompt": "Drawing upon your acquaintances in the context, identify the most suitable rebuttal. If an answer isn't available, state 'unanswerable'.", + "transfer_ori_acc": 87.04992770387507, + "transfer_atk_acc": 85.42265497660233 + }, + { + "dataset": "iwslt", + "type": "task", + "origin_acc": 0.27, + "origin_prompt": "Accurately translate the sentence from {} to {}, ensuring the meaning remains intact.", + "atk_acc": 0.19, + "atk_prompt": "Rightly translate the convicts from {} to {}, insurance the signify sojourn unaffected. ", + "transfer_ori_acc": 0.2195336520895316, + "transfer_atk_acc": 0.2204127497581867 + }, + { + "dataset": "iwslt", + "type": "role", + "origin_acc": 0.27, + "origin_prompt": "Assuming the responsibilities of a professional translator, translate the subsequent text passage from {} to {}.", + "atk_acc": 0.19, + "atk_prompt": "Adopt the indebted of a vocational artist, translate the subsequent laws transition from {} to {}. ", + "transfer_ori_acc": 0.22030199520456395, + "transfer_atk_acc": 0.2233226772101081 + }, + { + "dataset": "un_multi", + "type": "task", + "origin_acc": 0.36, + "origin_prompt": "Perform a machine translation for the provided sentence, changing it from {} to {}.", + "atk_acc": 0.35, + "atk_prompt": "Perform a computer translate for the tabled sentence, changing it from {} to {}. ", + "transfer_ori_acc": 0.30870734220870066, + "transfer_atk_acc": 0.3112750115865155 + }, + { + "dataset": "un_multi", + "type": "role", + "origin_acc": 0.36, + "origin_prompt": "As a skilled linguist, please convert the following sentence from {} to {}.", + "atk_acc": 0.35, + "atk_prompt": "As a skilled linguist, please translate the following punishment from {} to {}. ", + "transfer_ori_acc": 0.3046140486156621, + "transfer_atk_acc": 0.30805246751439636 + }, + { + "dataset": "math", + "type": "task", + "origin_acc": 16.25, + "origin_prompt": "Calculate the answer to the following math query about {}.", + "atk_acc": 10.0, + "atk_prompt": "Estimate the rebuttal to the following math view about {}. ", + "transfer_ori_acc": 16.25, + "transfer_atk_acc": 15.625 + }, + { + "dataset": "math", + "type": "role", + "origin_acc": 16.25, + "origin_prompt": "Act as a math expert and compute the result of the following mathematical task about {}.", + "atk_acc": 13.75, + "atk_prompt": "Act as a math expert and estimate the conclusion of the following mathematical task about {}. ", + "transfer_ori_acc": 15.625, + "transfer_atk_acc": 15.625 + } +] \ No newline at end of file diff --git a/results_transfer/google-flan-ul2_google-flan-t5-large/translation_0_shot.json b/results_transfer/google-flan-ul2_google-flan-t5-large/translation_0_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..ce1b6b6bf335a9f5582de482d560b52d4354820e --- /dev/null +++ b/results_transfer/google-flan-ul2_google-flan-t5-large/translation_0_shot.json @@ -0,0 +1,122 @@ +[ + { + "dataset": "cola", + "type": "translation", + "origin_acc": 86.2, + "origin_prompt": "Assess the following sentence and determine if it is grammatically correct. Respond with 'Acceptable' or 'Unacceptable':", + "atk_acc": 84.7, + "atk_prompt": "As a tool for assessing semantic relevance, categorize the sentence as 'acceptable' or 'unacceptable' according to its meaning and context.", + "transfer_ori_acc": 78.0, + "transfer_atk_acc": 75.7 + }, + { + "dataset": "mrpc", + "type": "translation", + "origin_acc": 88.73, + "origin_prompt": "As a sentence similarity evaluator, analyze the provided sentences and indicate if their meanings are 'equivalent' or 'not_equivalent'.", + "atk_acc": 81.13, + "atk_prompt": "Whether a given pair of sentences is 'equivalent' or 'not_equivalent' depends on the context.", + "transfer_ori_acc": 80.63725490196079, + "transfer_atk_acc": 83.08823529411765 + }, + { + "dataset": "qnli", + "type": "translation", + "origin_acc": 94.6, + "origin_prompt": "Based on the provided context and question, decide if the information supports the answer by responding with 'entailment' or 'not_entailment'.", + "atk_acc": 78.9, + "atk_prompt": "By analyzing the relations between sentences, determine if a given sentence does not necessarily include other sentences and answer with 'entailment' or 'not_entailment'.", + "transfer_ori_acc": 89.4, + "transfer_atk_acc": 85.6 + }, + { + "dataset": "qqp", + "type": "translation", + "origin_acc": 89.2, + "origin_prompt": "While performing question comparison analysis, classify the similarity of the following questions as 'equivalent' for equivalent questions or 'not_equivalent' for different questions.", + "atk_acc": 87.4, + "atk_prompt": "Classify sentence equivalence as 'equivalent' or 'not_equivalent' by judging whether two sentences have the same meaning to each other.", + "transfer_ori_acc": 86.6, + "transfer_atk_acc": 86.1 + }, + { + "dataset": "rte", + "type": "translation", + "origin_acc": 94.58, + "origin_prompt": "Working as an entailment classifier, identify whether the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment':", + "atk_acc": 89.89, + "atk_prompt": "Examine the paraphrases of a given sentence and classify them 'entailment' or 'not_entailment'.", + "transfer_ori_acc": 83.03249097472924, + "transfer_atk_acc": 81.58844765342961 + }, + { + "dataset": "sst2", + "type": "translation", + "origin_acc": 96.56, + "origin_prompt": "In the role of a sentiment analysis tool, respond with 'positive' or 'negative' to classify this statement:", + "atk_acc": 93.46, + "atk_prompt": "Please analyze the emotion in this sentence and classify it as 'positive' or 'negative'.", + "transfer_ori_acc": 95.41284403669725, + "transfer_atk_acc": 93.80733944954129 + }, + { + "dataset": "wnli", + "type": "translation", + "origin_acc": 80.28, + "origin_prompt": "Assess the connection between the following sentences and classify it as 'entailment' or 'not_entailment'.", + "atk_acc": 73.24, + "atk_prompt": "Given a couple of phrases, label their relationship as 'entailment' if one necessarily implies the other, or as 'not_entailment' if there is no such implication.", + "transfer_ori_acc": 61.97183098591549, + "transfer_atk_acc": 64.7887323943662 + }, + { + "dataset": "mmlu", + "type": "translation", + "origin_acc": 53.19, + "origin_prompt": "As an expert in {}, respond to the following multiple-choice question by selecting 'A', 'B', 'C', or 'D'.", + "atk_acc": 36.52, + "atk_prompt": "Choose the correct answer to the multiple-choice question below. Is '{}' an 'A', 'B', 'C', or 'D'.", + "transfer_ori_acc": 44.680851063829785, + "transfer_atk_acc": 43.262411347517734 + }, + { + "dataset": "squad_v2", + "type": "translation", + "origin_acc": 90.05, + "origin_prompt": "Please derive the most fitting answer from the context. If there isn't an answer in the context, respond with 'unanswerable'.", + "atk_acc": 84.45, + "atk_prompt": "Please give a solution to what kind of problem in the following sentence. If there is no solution, respond with 'unanswerable'.", + "transfer_ori_acc": 87.64992770387508, + "transfer_atk_acc": 85.51584249084247 + }, + { + "dataset": "iwslt", + "type": "translation", + "origin_acc": 0.18, + "origin_prompt": "Accurately translate the sentence from {} to {}, ensuring the meaning remains intact:", + "atk_acc": 0.16, + "atk_prompt": "Please translate the given sentence into {} to {}.", + "transfer_ori_acc": 0.13434360012327884, + "transfer_atk_acc": 0.1321435432036806 + }, + { + "dataset": "un_multi", + "type": "translation", + "origin_acc": 0.32, + "origin_prompt": "Please perform a translation of the given sentence, converting it from {} to {}:", + "atk_acc": 0.31, + "atk_prompt": "Please translate the given sentence, converting it from {} to {}.", + "transfer_ori_acc": 0.2683567351751297, + "transfer_atk_acc": 0.268967122899458 + }, + { + "dataset": "math", + "type": "translation", + "origin_acc": 15.62, + "origin_prompt": "Act as a math expert and compute the result of the following mathematical task about {}:", + "atk_acc": 14.37, + "atk_prompt": "According to {}, determine the next value.", + "transfer_ori_acc": 3.125, + "transfer_atk_acc": 14.374999999999998 + } +] \ No newline at end of file diff --git a/results_transfer/google-flan-ul2_google-flan-t5-large/translation_3_shot.json b/results_transfer/google-flan-ul2_google-flan-t5-large/translation_3_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..b2803fea0e26cc2c0bdd14ab081b9375f07f3fba --- /dev/null +++ b/results_transfer/google-flan-ul2_google-flan-t5-large/translation_3_shot.json @@ -0,0 +1,122 @@ +[ + { + "dataset": "cola", + "type": "translation", + "origin_acc": 86.9, + "origin_prompt": "In the capacity of a grammar assessment system, indicate if the structure of the provided sentence is grammatically correct, responding with 'acceptable' or 'unacceptable'.", + "atk_acc": 84.6, + "atk_prompt": "As a tool for assessing semantic relevance, categorize the sentence as 'acceptable' or 'unacceptable' according to its meaning and context.", + "transfer_ori_acc": 74.6, + "transfer_atk_acc": 76.4 + }, + { + "dataset": "mrpc", + "type": "translation", + "origin_acc": 88.97, + "origin_prompt": "As a sentence similarity evaluator, analyze the provided sentences and indicate if their meanings are 'equivalent' or 'not_equivalent'.", + "atk_acc": 84.07, + "atk_prompt": "Whether a given pair of sentences is 'equivalent' or 'not_equivalent' depends on the context.", + "transfer_ori_acc": 79.16666666666666, + "transfer_atk_acc": 80.63725490196079 + }, + { + "dataset": "qnli", + "type": "translation", + "origin_acc": 94.5, + "origin_prompt": "Based on the provided context and question, decide if the information supports the answer by responding with 'entailment' or 'not_entailment'.", + "atk_acc": 83.0, + "atk_prompt": "By analyzing the relations between sentences, determine if a given sentence does not necessarily include other sentences and answer with 'entailment' or 'not_entailment'.", + "transfer_ori_acc": 89.60000000000001, + "transfer_atk_acc": 86.6 + }, + { + "dataset": "qqp", + "type": "translation", + "origin_acc": 90.0, + "origin_prompt": "As a tool for determining question equivalence, review the questions and categorize their similarity as either 'equivalent' or 'not_equivalent'.", + "atk_acc": 87.7, + "atk_prompt": "Classify sentence equivalence as 'equivalent' or 'not_equivalent' by judging whether two sentences have the same meaning to each other.", + "transfer_ori_acc": 86.3, + "transfer_atk_acc": 85.39999999999999 + }, + { + "dataset": "rte", + "type": "translation", + "origin_acc": 93.86, + "origin_prompt": "Working as an entailment classifier, identify whether the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment'.", + "atk_acc": 90.61, + "atk_prompt": "Examine the semantic match of a given sentence and categorize it as 'entailment' or 'not_entailment'.", + "transfer_ori_acc": 83.75451263537906, + "transfer_atk_acc": 80.86642599277978 + }, + { + "dataset": "sst2", + "type": "translation", + "origin_acc": 96.9, + "origin_prompt": "Working as a sentiment analyzer, please indicate if the following text is 'positive' or 'negative'.", + "atk_acc": 94.38, + "atk_prompt": "As an emotion analysis tool, determine whether the emotion in the text is 'positive' or 'negative'.", + "transfer_ori_acc": 95.29816513761467, + "transfer_atk_acc": 93.34862385321101 + }, + { + "dataset": "wnli", + "type": "translation", + "origin_acc": 77.46, + "origin_prompt": "Assess the connection between the following sentences and classify it as 'entailment' or 'not_entailment'.", + "atk_acc": 74.65, + "atk_prompt": "For the purpose of determining implicative relations, analyze the relations of the following sentences and classify them as 'entailment' or 'not_entailment'.", + "transfer_ori_acc": 64.7887323943662, + "transfer_atk_acc": 57.74647887323944 + }, + { + "dataset": "mmlu", + "type": "translation", + "origin_acc": 54.08, + "origin_prompt": "For the multiple-choice question related to {}, please choose the most accurate answer from 'A', 'B', 'C', or 'D'.", + "atk_acc": 51.95, + "atk_prompt": "Choose the correct answer to the multiple-choice question below. Is '{}' an 'A', 'B', 'C', or 'D'.", + "transfer_ori_acc": 45.56737588652482, + "transfer_atk_acc": 45.21276595744681 + }, + { + "dataset": "squad_v2", + "type": "translation", + "origin_acc": 91.05, + "origin_prompt": "Identify the most relevant answer from the context. If it's not possible to find an answer, state 'unanswerable'.", + "atk_acc": 88.73, + "atk_prompt": "Consider the information derived from the given text and provide the most detailed answer. If the answer is not in the text, please answer 'unanswerable'.", + "transfer_ori_acc": 87.01356406751144, + "transfer_atk_acc": 86.91356406751143 + }, + { + "dataset": "iwslt", + "type": "translation", + "origin_acc": 0.27, + "origin_prompt": "Accurately translate the sentence from {} to {}, ensuring the meaning remains intact.", + "atk_acc": 0.22, + "atk_prompt": "Please translate the next sentence from {} to {}.", + "transfer_ori_acc": 0.2195336520895316, + "transfer_atk_acc": 0.21601471623392518 + }, + { + "dataset": "un_multi", + "type": "translation", + "origin_acc": 0.36, + "origin_prompt": "Perform a machine translation for the provided sentence, changing it from {} to {}.", + "atk_acc": 0.35, + "atk_prompt": "Please convert the following sentences to {} and translate to {}.", + "transfer_ori_acc": 0.30870734220870066, + "transfer_atk_acc": 0.30344234228764916 + }, + { + "dataset": "math", + "type": "translation", + "origin_acc": 16.88, + "origin_prompt": "Figure out the solution for the following mathematical task with {}.", + "atk_acc": 10.0, + "atk_prompt": "According to {}, set the following value.", + "transfer_ori_acc": 17.5, + "transfer_atk_acc": 15.625 + } +] \ No newline at end of file diff --git a/results_transfer/google-flan-ul2_vicuna-13b/bertattack_0_shot.json b/results_transfer/google-flan-ul2_vicuna-13b/bertattack_0_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..ba9a90d4ffdfe9250131dc177e6cda3566170ce9 --- /dev/null +++ b/results_transfer/google-flan-ul2_vicuna-13b/bertattack_0_shot.json @@ -0,0 +1,162 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 86.0, + "origin_prompt": "Examine the given sentence and decide if it is grammatically sound. Answer with either 'Acceptable' or 'Unacceptable':", + "atk_acc": 25.3, + "atk_prompt": "find the particular sentence and look if it is grammaticaldefinitely present. responded with neither 'Acceptable' or 'Unacceptable':", + "transfer_ori_acc": 50.0, + "transfer_atk_acc": 25.1 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 86.1, + "origin_prompt": "In your role as a grammar check tool, assess the following sentence and classify it as 'acceptable' if it is grammatically correct or 'unacceptable' if it is incorrect:", + "atk_acc": 30.9, + "atk_prompt": "should your job as a grammar get check, assess the following sentence and classify it as 'acceptable' if it is grammaticalmore improper or 'unacceptable' if it is contradictory:", + "transfer_ori_acc": 43.1, + "transfer_atk_acc": 62.4 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 88.24, + "origin_prompt": "In your capacity as a language analyst, assess the following sentences and classify their similarity as 'equivalent' or 'not_equivalent'.", + "atk_acc": 70.1, + "atk_prompt": "during your capacity as a grammatical analytic, calculate the following words and classify their difference as 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 51.225490196078425, + "transfer_atk_acc": 20.588235294117645 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 87.25, + "origin_prompt": "Can the given sentences be considered semantically identical? Please reply with 'equivalent' or 'not_equivalent'.", + "atk_acc": 34.8, + "atk_prompt": "can the following sentences be considered semanticmore inferior? this reply with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 53.431372549019606, + "transfer_atk_acc": 45.34313725490196 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 94.1, + "origin_prompt": "Based on the information in the context, decide if the answer to the question is justified by choosing 'entailment' or 'not_entailment'.", + "atk_acc": 52.7, + "atk_prompt": "Based on the info in the context, decide if the object to the question is wrong by choosing 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 34.1, + "transfer_atk_acc": 11.0 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 94.2, + "origin_prompt": "As a semantic researcher, evaluate whether the provided context supports the answer to the question and choose 'entailment' or 'not_entailment'.", + "atk_acc": 42.6, + "atk_prompt": "while a semantic researcher, evaluate positive the supported context returns the answer to the question and reject 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 13.4, + "transfer_atk_acc": 2.6 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 88.2, + "origin_prompt": "Please indicate if the following pair of sentences share the same meaning by responding with 'equivalent' or 'not_equivalent'.", + "atk_acc": 70.8, + "atk_prompt": "Please indicate if the following individual of verbs share the same meaning by auxiliary with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 31.8, + "transfer_atk_acc": 8.9 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 88.5, + "origin_prompt": "As a tool for determining question equivalence, review the questions and categorize their similarity as either 'equivalent' or 'not_equivalent'.", + "atk_acc": 70.9, + "atk_prompt": "As a tool for weighing question equivalence, review the data and codeegoassociate their difference as neither 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 8.200000000000001, + "transfer_atk_acc": 4.2 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 94.22, + "origin_prompt": "Examine the pair of sentences and determine if they exhibit entailment or not_entailment. Answer with either 'entailment' or 'not_entailment'.", + "atk_acc": 88.45, + "atk_prompt": "examine the parent of sentences and explain if they exhibit entailment or not_entailment. consider with neither 'entailment' or 'not_entailment'.", + "transfer_ori_acc": 27.79783393501805, + "transfer_atk_acc": 25.27075812274368 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 93.86, + "origin_prompt": "Acting as an entailment detection instrument, determine if the given pair of sentences demonstrates entailment or not_entailment. Answer with 'entailment' or 'not_entailment':", + "atk_acc": 83.03, + "atk_prompt": "Acting as an entailment on instrument, determine if the hit pair of verb appears entailment or not_entailment. answers with 'entailment' or 'not_entailment':", + "transfer_ori_acc": 42.96028880866426, + "transfer_atk_acc": 49.81949458483754 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 96.44, + "origin_prompt": "Analyze the tone of this statement and respond with either 'positive' or 'negative':", + "atk_acc": 0.11, + "atk_prompt": "Analyze the topic of this phrase and responding with neither 'positive' or 'negative': ", + "transfer_ori_acc": 47.935779816513765, + "transfer_atk_acc": 4.81651376146789 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 96.33, + "origin_prompt": "As a sentiment classifier, determine whether the following text is 'positive' or 'negative'. Please classify:", + "atk_acc": 43.69, + "atk_prompt": "like a sentiment classifier, identify neither the following text is 'positive' or 'negative'. from submit: ", + "transfer_ori_acc": 33.94495412844037, + "transfer_atk_acc": 8.944954128440367 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 78.87, + "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.", + "atk_acc": 71.83, + "atk_prompt": "Are the main two sentences entailment or not_entailment? begin me with \"entailment\" or \"not_entailment\", just the word. ", + "transfer_ori_acc": 25.352112676056336, + "transfer_atk_acc": 40.845070422535215 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 78.87, + "origin_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment':", + "atk_acc": 70.42, + "atk_prompt": "In your role as an entailment analysis tool, report the commitment between the and are and classify it as 'entailment' or 'not_entailment':", + "transfer_ori_acc": 43.66197183098591, + "transfer_atk_acc": 23.943661971830984 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 52.84, + "origin_prompt": "Please respond to the multiple-choice question about {} by selecting the appropriate answer: 'A', 'B', 'C', or 'D'.", + "atk_acc": 17.38, + "atk_prompt": "not respond to the multiple-response question about {} by choosing the responses correctly: 'A', 'B', 'C', or 'D'. ", + "transfer_ori_acc": 19.148936170212767, + "transfer_atk_acc": 30.851063829787233 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 53.19, + "origin_prompt": "As a knowledgeable individual in {}, provide your response to the multiple-choice question by choosing 'A', 'B', 'C', or 'D'.", + "atk_acc": 37.94, + "atk_prompt": "not a knowledgeable example in {}, state your response to the multiple-is question by choosing 'A', 'B', 'C', or 'D'.", + "transfer_ori_acc": 22.69503546099291, + "transfer_atk_acc": 18.439716312056735 + } +] \ No newline at end of file diff --git a/results_transfer/google-flan-ul2_vicuna-13b/bertattack_3_shot.json b/results_transfer/google-flan-ul2_vicuna-13b/bertattack_3_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..466bd7d5af5e33bec7b32d248b325c7c77ddc33e --- /dev/null +++ b/results_transfer/google-flan-ul2_vicuna-13b/bertattack_3_shot.json @@ -0,0 +1,162 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 86.4, + "origin_prompt": "Check the grammar of the following sentence and indicate if it is 'Acceptable' or 'Unacceptable'.", + "atk_acc": 78.4, + "atk_prompt": "label the person of the following sentence and check if it is 'Acceptable' or 'Unacceptable'. ", + "transfer_ori_acc": 39.1, + "transfer_atk_acc": 29.799999999999997 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 86.6, + "origin_prompt": "While performing grammar analysis, classify the grammar of the following sentence as 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar.", + "atk_acc": 78.2, + "atk_prompt": "be doing grammar analysis, classify the rule of the following example as 'acceptable' for wrong spelling or 'unacceptable' for bad grammar. ", + "transfer_ori_acc": 49.9, + "transfer_atk_acc": 38.2 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 86.76, + "origin_prompt": "Compare the following sentences and determine if they share the same semantic meaning by responding with 'equivalent' or 'not_equivalent'.", + "atk_acc": 39.71, + "atk_prompt": "like the following sentences and determine if they lack the same plural meaning by responding with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 62.254901960784316, + "transfer_atk_acc": 35.78431372549019 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 88.97, + "origin_prompt": "As a sentence similarity evaluator, analyze the provided sentences and indicate if their meanings are 'equivalent' or 'not_equivalent'.", + "atk_acc": 81.37, + "atk_prompt": "As a sentence compare evaluators, view the derived variables and indicate if their meanings are 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 68.13725490196079, + "transfer_atk_acc": 70.34313725490196 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 94.0, + "origin_prompt": "Analyze the context and question, and indicate if the context entails the answer by choosing 'entailment' or 'not_entailment'.", + "atk_acc": 88.8, + "atk_prompt": "Analyze the context and question, and indicate if the dictionary ennotes the context by avoiding 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 35.0, + "transfer_atk_acc": 7.9 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 93.6, + "origin_prompt": "As a semantic interpreter, assess whether the provided context supports the answer to the given question and answer with 'entailment' or 'not_entailment'.", + "atk_acc": 83.5, + "atk_prompt": "with a semantic presentation, evaluation why the provided contexts supports the answer to the given answer and questions with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 33.5, + "transfer_atk_acc": 5.2 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 88.7, + "origin_prompt": "Examine the following expressions and tell me if they are alike in meaning by using 'equivalent' or 'not_equivalent'.", + "atk_acc": 82.9, + "atk_prompt": "Examine the past expressions and give me if they are common in meaning by putting 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 44.9, + "transfer_atk_acc": 48.1 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 90.1, + "origin_prompt": "As a tool for determining question equivalence, review the questions and categorize their similarity as either 'equivalent' or 'not_equivalent'.", + "atk_acc": 74.3, + "atk_prompt": "according a tool for identifying concept equivalence, do the questions and tcsconsider their compared as neither 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 31.7, + "transfer_atk_acc": 30.8 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 93.5, + "origin_prompt": "Identify whether the given pair of sentences demonstrates entailment or not_entailment. Answer with 'entailment' or 'not_entailment'.", + "atk_acc": 91.34, + "atk_prompt": "a as the given pair of verb demonstrates entailment or not_entailment. question with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 7.2202166064981945, + "transfer_atk_acc": 41.51624548736462 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 93.86, + "origin_prompt": "Working as an entailment classifier, identify whether the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment'.", + "atk_acc": 65.34, + "atk_prompt": "acting as an entailment classindicator, identify neither the given pair of sentences displays entailment or not_entailment. reply with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 6.4981949458483745, + "transfer_atk_acc": 41.15523465703971 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 96.79, + "origin_prompt": "Determine the overall sentiment of this sentence, categorizing it as 'positive' or 'negative'.", + "atk_acc": 92.2, + "atk_prompt": "locate the associate emotion of this sentence, categorizing it as 'positive' or 'negative'. ", + "transfer_ori_acc": 0.573394495412844, + "transfer_atk_acc": 9.977064220183486 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 96.56, + "origin_prompt": "As a sentiment classifier, determine whether the following text is 'positive' or 'negative'.", + "atk_acc": 95.3, + "atk_prompt": "in a sentiment classifier, determine where the over words is 'positive' or 'negative'. ", + "transfer_ori_acc": 15.36697247706422, + "transfer_atk_acc": 34.288990825688074 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 77.46, + "origin_prompt": "Assess the connection between the following sentences and classify it as 'entailment' or 'not_entailment'.", + "atk_acc": 74.65, + "atk_prompt": "a the links between the following sentences and classify it as 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 19.718309859154928, + "transfer_atk_acc": 26.76056338028169 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 77.46, + "origin_prompt": "Working as an entailment classifier, identify whether the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment'.", + "atk_acc": 66.2, + "atk_prompt": "Working as an entailment classauxiliary, identify but the verbal a of nouns displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 0.0, + "transfer_atk_acc": 18.30985915492958 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 53.9, + "origin_prompt": "Below are multiple-choice question concerning {}. Indicate your response with 'A', 'B', 'C', or 'D'.", + "atk_acc": 51.24, + "atk_prompt": "Below are multiple-guessing guess concerning {}. begin your response with 'A', 'B', 'C', or 'D'. ", + "transfer_ori_acc": 6.382978723404255, + "transfer_atk_acc": 6.914893617021277 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 54.08, + "origin_prompt": "Considering your familiarity with {}, attend to the following multiple-choice question by picking 'A', 'B', 'C', or 'D'.", + "atk_acc": 51.77, + "atk_prompt": "considering your familiarity with {}, order to the following multiple-picked question by starting 'A', 'B', 'C', or 'D'.", + "transfer_ori_acc": 8.156028368794328, + "transfer_atk_acc": 11.702127659574469 + } +] \ No newline at end of file diff --git a/results_transfer/google-flan-ul2_vicuna-13b/checklist_0_shot.json b/results_transfer/google-flan-ul2_vicuna-13b/checklist_0_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..a0f054d69a2af2abc36ac1623df6ff49fe1c3038 --- /dev/null +++ b/results_transfer/google-flan-ul2_vicuna-13b/checklist_0_shot.json @@ -0,0 +1,162 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 85.9, + "origin_prompt": "Determine if the grammar of the given sentence is 'Acceptable' or 'Unacceptable':", + "atk_acc": 66.1, + "atk_prompt": "Determine if the grammar of the given sentence is 'Acceptable' or 'Unacceptable' EAB4KP2NVY:", + "transfer_ori_acc": 36.4, + "transfer_atk_acc": 50.8 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 86.1, + "origin_prompt": "In your role as a grammar check tool, assess the following sentence and classify it as 'acceptable' if it is grammatically correct or 'unacceptable' if it is incorrect:", + "atk_acc": 55.0, + "atk_prompt": "In your role as a grammar check tool, assess the following sentence and classify it as 'acceptable' if it is grammatically correct or 'unacceptable' if it is incorrect bhrRSokrfa:", + "transfer_ori_acc": 43.1, + "transfer_atk_acc": 10.6 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 86.52, + "origin_prompt": "Please analyze the provided sentences and indicate if their meanings are the same by choosing 'equivalent' or 'not_equivalent'.", + "atk_acc": 85.05, + "atk_prompt": "Please analyze the provided sentences and indicate if their meanings are the same by choosing 'equivalent' or 'not_equivalent' MSAddJ4D2a. ", + "transfer_ori_acc": 54.166666666666664, + "transfer_atk_acc": 14.215686274509803 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 87.01, + "origin_prompt": "As a textual similarity evaluator, analyze the following pair of sentences and determine if they are semantically 'equivalent' or 'not_equivalent'.", + "atk_acc": 87.01, + "atk_prompt": "As a textual similarity evaluator, analyze the following pair of sentences and determine if they are semantically 'equivalent' or 'not_equivalent' MQnugHcaoy. ", + "transfer_ori_acc": 20.34313725490196, + "transfer_atk_acc": 22.30392156862745 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 94.0, + "origin_prompt": "Given the question and context provided, determine if the answer can be inferred by choosing 'entailment' or 'not_entailment'.", + "atk_acc": 93.9, + "atk_prompt": "Given the question and context provided, determine if the answer can be inferred by choosing 'entailment' or 'not_entailment' CuVpbbkC4c. ", + "transfer_ori_acc": 17.1, + "transfer_atk_acc": 8.6 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 94.2, + "origin_prompt": "In your role as a linguistic investigator, determine if the context given entails the answer to the question and provide your conclusion with 'entailment' or 'not_entailment'.", + "atk_acc": 93.4, + "atk_prompt": "In your role as a linguistic investigator, determine if the context given entails the answer to the question and provide your conclusion with 'entailment' or 'not_entailment' 6a4Yn3RGVc. ", + "transfer_ori_acc": 31.5, + "transfer_atk_acc": 36.7 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 88.3, + "origin_prompt": "Do these two sentences convey the same meaning? Indicate with 'equivalent' or 'not_equivalent'.", + "atk_acc": 87.3, + "atk_prompt": "Do these two sentences convey the same meaning? Indicate with 'equivalent' or 'not_equivalent' MSAddJ4D2a. ", + "transfer_ori_acc": 28.7, + "transfer_atk_acc": 26.8 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 88.3, + "origin_prompt": "As an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions.", + "atk_acc": 88.4, + "atk_prompt": "As an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions SFPCqpiClT. ", + "transfer_ori_acc": 26.200000000000003, + "transfer_atk_acc": 19.400000000000002 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 94.22, + "origin_prompt": "Identify whether the given pair of sentences demonstrates entailment or not_entailment. Answer with 'entailment' or 'not_entailment'.", + "atk_acc": 93.14, + "atk_prompt": "Identify whether the given pair of sentences demonstrates entailment or not_entailment. Answer with 'entailment' or 'not_entailment' EAB4KP2NVY.", + "transfer_ori_acc": 27.79783393501805, + "transfer_atk_acc": 20.938628158844764 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 93.86, + "origin_prompt": "Acting as an entailment detection instrument, determine if the given pair of sentences demonstrates entailment or not_entailment. Answer with 'entailment' or 'not_entailment':", + "atk_acc": 93.5, + "atk_prompt": "Acting as an entailment detection instrument, determine if the given pair of sentences demonstrates entailment or not_entailment. Answer with 'entailment' or 'not_entailment' ofw9fEkN5R:", + "transfer_ori_acc": 42.96028880866426, + "transfer_atk_acc": 38.628158844765345 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 96.56, + "origin_prompt": "Analyze the tone of this statement and respond with either 'positive' or 'negative':", + "atk_acc": 95.76, + "atk_prompt": "Analyze the tone of this statement and respond with either 'positive' or 'negative' Cev0s49fbe: ", + "transfer_ori_acc": 47.935779816513765, + "transfer_atk_acc": 48.96788990825688 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 96.33, + "origin_prompt": "Working as a sentiment analyzer, please indicate if the following text is 'positive' or 'negative'. Classify:", + "atk_acc": 95.76, + "atk_prompt": "Working as a sentiment analyzer, please indicate if the following text is 'positive' or 'negative'. Classify 9vGXgnbWB8: ", + "transfer_ori_acc": 41.97247706422018, + "transfer_atk_acc": 23.279816513761467 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 80.28, + "origin_prompt": "Assess the connection between the following sentences and classify it as 'entailment' or 'not_entailment'.", + "atk_acc": 74.65, + "atk_prompt": "Assess the connection between the following sentences and classify it as 'entailment' or 'not_entailment' EAB4KP2NVY.", + "transfer_ori_acc": 38.028169014084504, + "transfer_atk_acc": 29.577464788732392 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 80.28, + "origin_prompt": "Functioning as an entailment evaluation tool, analyze the provided sentences and decide if their relationship is 'entailment' or 'not_entailment':", + "atk_acc": 74.65, + "atk_prompt": "Functioning as an entailment evaluation tool, analyze the provided sentences and decide if their relationship is 'entailment' or 'not_entailment' 5yWbBXztUY:", + "transfer_ori_acc": 43.66197183098591, + "transfer_atk_acc": 9.859154929577464 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 52.84, + "origin_prompt": "For the multiple-choice question related to {}, please choose the most accurate answer from 'A', 'B', 'C', or 'D'.", + "atk_acc": 51.95, + "atk_prompt": "For the multiple-choice question related to {}, please choose the most accurate answer from 'A', 'B', 'C', or 'D' XStsD2pUzu. ", + "transfer_ori_acc": 18.79432624113475, + "transfer_atk_acc": 21.808510638297875 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 53.19, + "origin_prompt": "As an expert in {}, respond to the following multiple-choice question by selecting 'A', 'B', 'C', or 'D'.", + "atk_acc": 51.77, + "atk_prompt": "As an expert in {}, respond to the following multiple-choice question by selecting 'A', 'B', 'C', or 'D' IhuBIhoPGc.", + "transfer_ori_acc": 23.04964539007092, + "transfer_atk_acc": 20.56737588652482 + } +] \ No newline at end of file diff --git a/results_transfer/google-flan-ul2_vicuna-13b/checklist_3_shot.json b/results_transfer/google-flan-ul2_vicuna-13b/checklist_3_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..2c4efde373472e65e53c06ea3307b1cdfb472ff5 --- /dev/null +++ b/results_transfer/google-flan-ul2_vicuna-13b/checklist_3_shot.json @@ -0,0 +1,162 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 86.4, + "origin_prompt": "Review the sentence below and identify whether its grammar is 'Acceptable' or 'Unacceptable'.", + "atk_acc": 85.6, + "atk_prompt": "Review the sentence below and identify whether its grammar is 'Acceptable' or 'Unacceptable' Q47Jmd4lMV. ", + "transfer_ori_acc": 26.0, + "transfer_atk_acc": 10.6 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 86.6, + "origin_prompt": "While performing grammar analysis, classify the grammar of the following sentence as 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar.", + "atk_acc": 86.2, + "atk_prompt": "While performing grammar analysis, classify the grammar of the following sentence as 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar EAB4KP2NVY. ", + "transfer_ori_acc": 49.9, + "transfer_atk_acc": 61.3 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 86.27, + "origin_prompt": "Do these two sentences have the same underlying meaning? Respond with 'equivalent' or 'not_equivalent'.", + "atk_acc": 85.78, + "atk_prompt": "Do these two sentences have the same underlying meaning? Respond with 'equivalent' or 'not_equivalent' 6a4Yn3RGVc. ", + "transfer_ori_acc": 60.5392156862745, + "transfer_atk_acc": 66.66666666666666 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 87.5, + "origin_prompt": "As a textual similarity evaluator, analyze the following pair of sentences and determine if they are semantically 'equivalent' or 'not_equivalent'.", + "atk_acc": 87.25, + "atk_prompt": "As a textual similarity evaluator, analyze the following pair of sentences and determine if they are semantically 'equivalent' or 'not_equivalent' ofw9fEkN5R. ", + "transfer_ori_acc": 59.31372549019608, + "transfer_atk_acc": 65.68627450980392 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 94.0, + "origin_prompt": "Analyze the context and question, and indicate if the context entails the answer by choosing 'entailment' or 'not_entailment'.", + "atk_acc": 93.4, + "atk_prompt": "Analyze the context and question, and indicate if the context entails the answer by choosing 'entailment' or 'not_entailment' lf8wBa2yEm. ", + "transfer_ori_acc": 35.0, + "transfer_atk_acc": 16.1 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 93.7, + "origin_prompt": "In your role as a semantic evaluator, determine if the provided context justifies the answer to the question and answer with 'entailment' or 'not_entailment'.", + "atk_acc": 93.5, + "atk_prompt": "In your role as a semantic evaluator, determine if the provided context justifies the answer to the question and answer with 'entailment' or 'not_entailment' zq0DcZ5dnI. ", + "transfer_ori_acc": 24.6, + "transfer_atk_acc": 1.2 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 88.5, + "origin_prompt": "Are the following two questions equivalent or not? Answer me with \"equivalent\" or \"not_equivalent\".", + "atk_acc": 88.4, + "atk_prompt": "Are the following two questions equivalent or not? Answer me with \"equivalent\" or \"not_equivalent Cev0s49fbe\". ", + "transfer_ori_acc": 34.8, + "transfer_atk_acc": 32.1 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 89.1, + "origin_prompt": "Functioning as a question similarity evaluation tool, analyze the given questions and decide if they share the same meaning, responding with 'equivalent' or 'not_equivalent'.", + "atk_acc": 88.7, + "atk_prompt": "Functioning as a question similarity evaluation tool, analyze the given questions and decide if they share the same meaning, responding with 'equivalent' or 'not_equivalent' KjPJJ2a7RB. ", + "transfer_ori_acc": 32.9, + "transfer_atk_acc": 28.599999999999998 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 93.5, + "origin_prompt": "Identify whether the given pair of sentences demonstrates entailment or not_entailment. Answer with 'entailment' or 'not_entailment'.", + "atk_acc": 92.78, + "atk_prompt": "Identify whether the given pair of sentences demonstrates entailment or not_entailment. Answer with 'entailment' or 'not_entailment' d6ZQ3u0GBQ. ", + "transfer_ori_acc": 7.2202166064981945, + "transfer_atk_acc": 5.054151624548736 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 93.5, + "origin_prompt": "Acting as an entailment detection instrument, determine if the given pair of sentences demonstrates entailment or not_entailment. Answer with 'entailment' or 'not_entailment'.", + "atk_acc": 93.14, + "atk_prompt": "Acting as an entailment detection instrument, determine if the given pair of sentences demonstrates entailment or not_entailment. Answer with 'entailment' or 'not_entailment' BWmsLx8xOA. ", + "transfer_ori_acc": 4.332129963898916, + "transfer_atk_acc": 46.20938628158845 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 96.79, + "origin_prompt": "Evaluate the sentiment of the given text and classify it as 'positive' or 'negative'.", + "atk_acc": 96.56, + "atk_prompt": "Evaluate the sentiment of the given text and classify it as 'positive' or 'negative' Cev0s49fbe. ", + "transfer_ori_acc": 10.435779816513762, + "transfer_atk_acc": 9.63302752293578 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 96.56, + "origin_prompt": "As a sentiment classifier, determine whether the following text is 'positive' or 'negative'.", + "atk_acc": 96.44, + "atk_prompt": "As a sentiment classifier, determine whether the following text is 'positive' or 'negative' 6a4Yn3RGVc. ", + "transfer_ori_acc": 15.36697247706422, + "transfer_atk_acc": 14.449541284403669 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 77.46, + "origin_prompt": "Assess the connection between the following sentences and classify it as 'entailment' or 'not_entailment'.", + "atk_acc": 76.06, + "atk_prompt": "Assess the connection between the following sentences and classify it as 'entailment' or 'not_entailment' 5yWbBXztUY. ", + "transfer_ori_acc": 19.718309859154928, + "transfer_atk_acc": 18.30985915492958 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 76.06, + "origin_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment'.", + "atk_acc": 76.06, + "atk_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment' 5XeflW1ZJc. ", + "transfer_ori_acc": 23.943661971830984, + "transfer_atk_acc": 12.676056338028168 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 53.9, + "origin_prompt": "Below are multiple-choice question concerning {}. Indicate your response with 'A', 'B', 'C', or 'D'.", + "atk_acc": 52.84, + "atk_prompt": "Below are multiple-choice question concerning {}. Indicate your response with 'A', 'B', 'C', or 'D' RcRneWo6Iv. ", + "transfer_ori_acc": 6.382978723404255, + "transfer_atk_acc": 6.914893617021277 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 54.08, + "origin_prompt": "Considering your familiarity with {}, attend to the following multiple-choice question by picking 'A', 'B', 'C', or 'D'.", + "atk_acc": 53.01, + "atk_prompt": "Considering your familiarity with {}, attend to the following multiple-choice question by picking 'A', 'B', 'C', or 'D' Q47Jmd4lMV.", + "transfer_ori_acc": 8.156028368794328, + "transfer_atk_acc": 6.914893617021277 + } +] \ No newline at end of file diff --git a/results_transfer/google-flan-ul2_vicuna-13b/deepwordbug_0_shot.json b/results_transfer/google-flan-ul2_vicuna-13b/deepwordbug_0_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..1b10c4b4378fb6beb326255e944eb49287e3e492 --- /dev/null +++ b/results_transfer/google-flan-ul2_vicuna-13b/deepwordbug_0_shot.json @@ -0,0 +1,162 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 85.9, + "origin_prompt": "Determine if the grammar of the given sentence is 'Acceptable' or 'Unacceptable':", + "atk_acc": 78.1, + "atk_prompt": "Determinqe if the grmamar of the igven sentewnce is 'Acceptable' or 'Unacceptable':", + "transfer_ori_acc": 36.4, + "transfer_atk_acc": 44.7 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 86.1, + "origin_prompt": "In your role as a grammar check tool, assess the following sentence and classify it as 'acceptable' if it is grammatically correct or 'unacceptable' if it is incorrect:", + "atk_acc": 84.5, + "atk_prompt": "In your role as a grammar check too, assess the following senttence and classify it as 'acceptable' if it is rgammatically correct or 'unacceptable' if it is incorroct:", + "transfer_ori_acc": 43.1, + "transfer_atk_acc": 3.8 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 87.25, + "origin_prompt": "Can the given sentences be considered semantically identical? Please reply with 'equivalent' or 'not_equivalent'.", + "atk_acc": 83.58, + "atk_prompt": "Can the given sentences be consiedred semantically idePntical? Plaese reply with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 53.431372549019606, + "transfer_atk_acc": 57.59803921568627 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 88.73, + "origin_prompt": "As a sentence similarity evaluator, analyze the provided sentences and indicate if their meanings are 'equivalent' or 'not_equivalent'.", + "atk_acc": 81.13, + "atk_prompt": "A a sentWnce sidmilarity evaluator, analyze the provAded sentences and idnicate if their meanVngs are 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 25.98039215686275, + "transfer_atk_acc": 31.372549019607842 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 94.0, + "origin_prompt": "Given the question and context provided, determine if the answer can be inferred by choosing 'entailment' or 'not_entailment'.", + "atk_acc": 93.8, + "atk_prompt": "Given the question and context provided, deermine if the answer can be inferred by choosing 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 17.1, + "transfer_atk_acc": 13.700000000000001 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 94.2, + "origin_prompt": "As a semantic researcher, evaluate whether the provided context supports the answer to the question and choose 'entailment' or 'not_entailment'.", + "atk_acc": 93.3, + "atk_prompt": "us a sAmantic researJcher, evZluate hwether the provided context suports the answer to the question and choose 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 13.4, + "transfer_atk_acc": 10.6 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 88.2, + "origin_prompt": "Please indicate if the following pair of sentences share the same meaning by responding with 'equivalent' or 'not_equivalent'.", + "atk_acc": 87.4, + "atk_prompt": "qPlease indicate if the folloing pair of senFences share the same meaning by resopnding with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 31.8, + "transfer_atk_acc": 18.099999999999998 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 88.5, + "origin_prompt": "As a tool for determining question equivalence, review the questions and categorize their similarity as either 'equivalent' or 'not_equivalent'.", + "atk_acc": 87.3, + "atk_prompt": "As a tool for determining question equivalence, review the questions and ategorize their similvarity as eitehr 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 8.200000000000001, + "transfer_atk_acc": 4.2 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 94.22, + "origin_prompt": "Examine the pair of sentences and determine if they exhibit entailment or not_entailment. Answer with either 'entailment' or 'not_entailment'.", + "atk_acc": 92.06, + "atk_prompt": "Examine the pir of sentencs and determine if they exxibit entailment or not_entailment. Answer with eitfer 'entailment' or 'not_entailment'.", + "transfer_ori_acc": 27.79783393501805, + "transfer_atk_acc": 21.660649819494584 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 93.86, + "origin_prompt": "As an instrument for entailment evaluation, consider the two sentences and determine if their relationship is 'entailment' or 'not_entailment'. Respond with 'entailment' or 'not_entailment':", + "atk_acc": 92.78, + "atk_prompt": "As an instrument for entailment evaluation, consider the two sentences and determine if their relationship is 'entailment' or 'not_entailment'. Respodn with 'entailment' or 'not_entailment':", + "transfer_ori_acc": 24.90974729241877, + "transfer_atk_acc": 32.851985559566785 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 96.56, + "origin_prompt": "Analyze the tone of this statement and respond with either 'positive' or 'negative':", + "atk_acc": 95.64, + "atk_prompt": "AnalyVe the tone of this statelent and resBond with etiher 'positive' or 'negative': ", + "transfer_ori_acc": 47.935779816513765, + "transfer_atk_acc": 8.486238532110093 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 96.33, + "origin_prompt": "As a sentiment classifier, determine whether the following text is 'positive' or 'negative'. Please classify:", + "atk_acc": 95.76, + "atk_prompt": "s a sentiment classifier, determine whether the following text is 'positive' or 'negative'. Please claSsify: ", + "transfer_ori_acc": 33.94495412844037, + "transfer_atk_acc": 25.688073394495415 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 80.28, + "origin_prompt": "Assess the connection between the following sentences and classify it as 'entailment' or 'not_entailment'.", + "atk_acc": 76.06, + "atk_prompt": "Nssess the connection between the fnollowing sentences and classify it as 'entailment' or 'not_entailment'.", + "transfer_ori_acc": 38.028169014084504, + "transfer_atk_acc": 29.577464788732392 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 80.28, + "origin_prompt": "Functioning as an entailment evaluation tool, analyze the provided sentences and decide if their relationship is 'entailment' or 'not_entailment':", + "atk_acc": 76.06, + "atk_prompt": "Functioning as an entailment ealuation tCol, aSalyze the provided sentences and decide if their relationship is 'entailment' or 'not_entailment':", + "transfer_ori_acc": 43.66197183098591, + "transfer_atk_acc": 40.845070422535215 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 52.84, + "origin_prompt": "Please respond to the multiple-choice question about {} by selecting the appropriate answer: 'A', 'B', 'C', or 'D'.", + "atk_acc": 51.95, + "atk_prompt": "Pqlease respond to the multiple-choice question about {} by Aelecting the appropriate answer: 'A', 'B', 'C', or 'D'. ", + "transfer_ori_acc": 19.148936170212767, + "transfer_atk_acc": 17.907801418439718 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 53.19, + "origin_prompt": "As an expert in {}, respond to the following multiple-choice question by selecting 'A', 'B', 'C', or 'D'.", + "atk_acc": 51.77, + "atk_prompt": "As an exert in {}, respond to the following multiple-choice question by selecting 'A', 'B', 'C', or 'D'.", + "transfer_ori_acc": 23.04964539007092, + "transfer_atk_acc": 24.645390070921984 + } +] \ No newline at end of file diff --git a/results_transfer/google-flan-ul2_vicuna-13b/deepwordbug_3_shot.json b/results_transfer/google-flan-ul2_vicuna-13b/deepwordbug_3_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..28d4680c17bd1e32acc4aa3946dbfd3071de271f --- /dev/null +++ b/results_transfer/google-flan-ul2_vicuna-13b/deepwordbug_3_shot.json @@ -0,0 +1,162 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 86.4, + "origin_prompt": "Review the sentence below and identify whether its grammar is 'Acceptable' or 'Unacceptable'.", + "atk_acc": 83.6, + "atk_prompt": "Reviee the sentence below and identfy wehther its grmamar is 'Acceptable' or 'Unacceptable'. ", + "transfer_ori_acc": 26.0, + "transfer_atk_acc": 22.400000000000002 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 86.9, + "origin_prompt": "In the capacity of a grammar assessment system, indicate if the structure of the provided sentence is grammatically correct, responding with 'acceptable' or 'unacceptable'.", + "atk_acc": 85.2, + "atk_prompt": "Io the capcity of a grammar assessment system, indicate if the strcture of the povided senrence is grammatiaclly corrct, responding with 'acceptable' or 'unacceptable'. ", + "transfer_ori_acc": 30.5, + "transfer_atk_acc": 15.299999999999999 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 87.01, + "origin_prompt": "Can the given sentences be considered semantically identical? Please reply with 'equivalent' or 'not_equivalent'.", + "atk_acc": 84.31, + "atk_prompt": "Can the given sentences be considered semantically idrntical? Please reply with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 65.44117647058823, + "transfer_atk_acc": 65.93137254901961 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 87.5, + "origin_prompt": "As a textual similarity evaluator, analyze the following pair of sentences and determine if they are semantically 'equivalent' or 'not_equivalent'.", + "atk_acc": 84.56, + "atk_prompt": "As a extual simialrity evaluator, analyze the following pair of sentences and determine if they are semantically 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 59.31372549019608, + "transfer_atk_acc": 61.76470588235294 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 94.0, + "origin_prompt": "Review the given context and question, and decide if the context contains enough information to support the answer by selecting 'entailment' or 'not_entailment'.", + "atk_acc": 93.4, + "atk_prompt": "Review the given cotext and question, and Pdecide if the context contains enough information to sGupport the answe by sefecting 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 35.6, + "transfer_atk_acc": 40.1 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 93.7, + "origin_prompt": "As a semantic interpreter, assess whether the provided context supports the answer to the given question and answer with 'entailment' or 'not_entailment'.", + "atk_acc": 92.9, + "atk_prompt": "As a semantic interpreter, assless wether the provided context supports the answer to the given question and answer with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 33.5, + "transfer_atk_acc": 16.7 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 88.7, + "origin_prompt": "Examine the following expressions and tell me if they are alike in meaning by using 'equivalent' or 'not_equivalent'.", + "atk_acc": 87.4, + "atk_prompt": "Examine the following expressiZns and tell me if they are aike in meaning by wusing 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 44.9, + "transfer_atk_acc": 34.0 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 89.3, + "origin_prompt": "While performing question comparison analysis, classify the similarity of the following questions as 'equivalent' for equivalent questions or 'not_equivalent' for different questions.", + "atk_acc": 87.6, + "atk_prompt": "While performing Pquestion comparison analysig, classify the simioarity of the following queFtions as 'equivalent' for equivalent questions or 'not_equivalent' for different questions. ", + "transfer_ori_acc": 38.3, + "transfer_atk_acc": 33.4 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 93.5, + "origin_prompt": "Determine if the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment'.", + "atk_acc": 92.42, + "atk_prompt": "Determine if the given paGr of sentenecs displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 9.386281588447654, + "transfer_atk_acc": 34.29602888086642 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 93.5, + "origin_prompt": "As a tool for determining entailment relationships, review the two statements and categorize their connection as either 'entailment' or 'not_entailment'.", + "atk_acc": 92.42, + "atk_prompt": "AL a tool for determining entailment relationhsips, review the two staJements and categorize their connection as either 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 28.158844765342963, + "transfer_atk_acc": 32.49097472924188 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 96.79, + "origin_prompt": "Evaluate the sentiment of the given text and classify it as 'positive' or 'negative'.", + "atk_acc": 96.33, + "atk_prompt": "Evaluate the sentiment of the given tXxt and classify it as 'positive' or 'negative'. ", + "transfer_ori_acc": 10.435779816513762, + "transfer_atk_acc": 36.46788990825688 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 96.56, + "origin_prompt": "As a sentiment classifier, determine whether the following text is 'positive' or 'negative'.", + "atk_acc": 96.1, + "atk_prompt": "As a sentiment classifier, determine whether the followJing text is 'positive' or 'negative'. ", + "transfer_ori_acc": 15.36697247706422, + "transfer_atk_acc": 29.81651376146789 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 77.46, + "origin_prompt": "Assess the connection between the following sentences and classify it as 'entailment' or 'not_entailment'.", + "atk_acc": 74.65, + "atk_prompt": "Assuss the connection between the following sensences and classify it as 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 19.718309859154928, + "transfer_atk_acc": 29.577464788732392 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 77.46, + "origin_prompt": "Working as an entailment classifier, identify whether the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment'.", + "atk_acc": 76.06, + "atk_prompt": "Working as an entailment classifeir, identify whether the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 0.0, + "transfer_atk_acc": 2.8169014084507045 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 53.9, + "origin_prompt": "Below are multiple-choice question concerning {}. Indicate your response with 'A', 'B', 'C', or 'D'.", + "atk_acc": 52.3, + "atk_prompt": "Belvw are multiple-chioce question conecrning {}. IndicaZe your response with 'A', 'B', 'C', or 'D'. ", + "transfer_ori_acc": 6.382978723404255, + "transfer_atk_acc": 7.092198581560284 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 54.08, + "origin_prompt": "Considering your familiarity with {}, attend to the following multiple-choice question by picking 'A', 'B', 'C', or 'D'.", + "atk_acc": 52.66, + "atk_prompt": "Considering your familiarity with {}, atKtend to the follownig multipleNchoice juestion by picking 'A', 'B', 'C', or 'D'.", + "transfer_ori_acc": 8.156028368794328, + "transfer_atk_acc": 9.75177304964539 + } +] \ No newline at end of file diff --git a/results_transfer/google-flan-ul2_vicuna-13b/stresstest_0_shot.json b/results_transfer/google-flan-ul2_vicuna-13b/stresstest_0_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..d9814a94bb4eb0434ad9cf8bd6e75f8465f7a7c7 --- /dev/null +++ b/results_transfer/google-flan-ul2_vicuna-13b/stresstest_0_shot.json @@ -0,0 +1,162 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 86.1, + "origin_prompt": "Examine the given sentence and decide if it is grammatically sound. Answer with either 'Acceptable' or 'Unacceptable':", + "atk_acc": 76.3, + "atk_prompt": "Examine the given sentence and decide if it is grammatically sound. Answer with either 'Acceptable' or 'Unacceptable' and false is not true :", + "transfer_ori_acc": 50.0, + "transfer_atk_acc": 57.699999999999996 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 86.1, + "origin_prompt": "In your role as a grammar check tool, assess the following sentence and classify it as 'acceptable' if it is grammatically correct or 'unacceptable' if it is incorrect:", + "atk_acc": 34.5, + "atk_prompt": "In your role as a grammar check tool, assess the following sentence and classify it as 'acceptable' if it is grammatically correct or 'unacceptable' if it is incorrect and true is true and true is true and true is true and true is true and true is true :", + "transfer_ori_acc": 43.1, + "transfer_atk_acc": 17.9 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 87.5, + "origin_prompt": "Are the meanings of the following pair of sentences the same? Answer with 'equivalent' or 'not_equivalent'.", + "atk_acc": 85.05, + "atk_prompt": "Are the meanings of the following pair of sentences the same? Answer with 'equivalent' or 'not_equivalent' and false is not true . ", + "transfer_ori_acc": 67.15686274509804, + "transfer_atk_acc": 68.13725490196079 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 87.01, + "origin_prompt": "As a textual similarity evaluator, analyze the following pair of sentences and determine if they are semantically 'equivalent' or 'not_equivalent'.", + "atk_acc": 85.78, + "atk_prompt": "As a textual similarity evaluator, analyze the following pair of sentences and determine if they are semantically 'equivalent' or 'not_equivalent' and false is not true . ", + "transfer_ori_acc": 20.34313725490196, + "transfer_atk_acc": 26.715686274509803 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 94.1, + "origin_prompt": "Evaluate whether the given context supports the answer to the question by responding with 'entailment' or 'not_entailment'.", + "atk_acc": 93.4, + "atk_prompt": "Evaluate whether the given context supports the answer to the question by responding with 'entailment' or 'not_entailment' and true is true . ", + "transfer_ori_acc": 26.5, + "transfer_atk_acc": 37.0 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 94.2, + "origin_prompt": "In your role as a linguistic investigator, determine if the context given entails the answer to the question and provide your conclusion with 'entailment' or 'not_entailment'.", + "atk_acc": 93.8, + "atk_prompt": "In your role as a linguistic investigator, determine if the context given entails the answer to the question and provide your conclusion with 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 31.5, + "transfer_atk_acc": 30.4 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 88.3, + "origin_prompt": "Do these two sentences convey the same meaning? Indicate with 'equivalent' or 'not_equivalent'.", + "atk_acc": 86.7, + "atk_prompt": "Do these two sentences convey the same meaning? Indicate with 'equivalent' or 'not_equivalent' and false is not true . ", + "transfer_ori_acc": 28.7, + "transfer_atk_acc": 31.0 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 88.3, + "origin_prompt": "As an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions.", + "atk_acc": 88.0, + "atk_prompt": "As an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions and true is true . ", + "transfer_ori_acc": 26.200000000000003, + "transfer_atk_acc": 27.0 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 94.22, + "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.", + "atk_acc": 91.7, + "atk_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 27.075812274368232, + "transfer_atk_acc": 7.581227436823104 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 93.86, + "origin_prompt": "As an instrument for entailment evaluation, consider the two sentences and determine if their relationship is 'entailment' or 'not_entailment'. Respond with 'entailment' or 'not_entailment':", + "atk_acc": 92.06, + "atk_prompt": "As an instrument for entailment evaluation, consider the two sentences and determine if their relationship is 'entailment' or 'not_entailment'. Respond with 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true :", + "transfer_ori_acc": 24.90974729241877, + "transfer_atk_acc": 37.18411552346571 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 96.33, + "origin_prompt": "Review this statement and decide whether it has a 'positive' or 'negative' sentiment:", + "atk_acc": 90.94, + "atk_prompt": "Review this statement and decide whether it has a 'positive' or 'negative' sentiment and false is not true : ", + "transfer_ori_acc": 42.54587155963303, + "transfer_atk_acc": 48.05045871559633 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 96.56, + "origin_prompt": "In the role of a sentiment analysis tool, respond with 'positive' or 'negative' to classify this statement:", + "atk_acc": 73.85, + "atk_prompt": "In the role of a sentiment analysis tool, respond with 'positive' or 'negative' to classify this statement and false is not true : ", + "transfer_ori_acc": 32.798165137614674, + "transfer_atk_acc": 49.88532110091743 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 80.28, + "origin_prompt": "Assess the connection between the following sentences and classify it as 'entailment' or 'not_entailment'.", + "atk_acc": 76.06, + "atk_prompt": "Assess the connection between the following sentences and classify it as 'entailment' or 'not_entailment' and false is not true .", + "transfer_ori_acc": 38.028169014084504, + "transfer_atk_acc": 22.535211267605636 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 80.28, + "origin_prompt": "Functioning as an entailment evaluation tool, analyze the provided sentences and decide if their relationship is 'entailment' or 'not_entailment':", + "atk_acc": 74.65, + "atk_prompt": "Functioning as an entailment evaluation tool, analyze the provided sentences and decide if their relationship is 'entailment' or 'not_entailment' and false is not true :", + "transfer_ori_acc": 43.66197183098591, + "transfer_atk_acc": 43.66197183098591 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 52.84, + "origin_prompt": "Please respond to the multiple-choice question about {} by selecting the appropriate answer: 'A', 'B', 'C', or 'D'.", + "atk_acc": 51.6, + "atk_prompt": "Please respond to the multiple-choice question about {} by selecting the appropriate answer: 'A', 'B', 'C', or 'D' and false is not true . ", + "transfer_ori_acc": 19.148936170212767, + "transfer_atk_acc": 17.907801418439718 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 53.19, + "origin_prompt": "As a knowledgeable individual in {}, provide your response to the multiple-choice question by choosing 'A', 'B', 'C', or 'D'.", + "atk_acc": 52.48, + "atk_prompt": "As a knowledgeable individual in {}, provide your response to the multiple-choice question by choosing 'A', 'B', 'C', or 'D' and true is true and true is true and true is true and true is true and true is true .", + "transfer_ori_acc": 22.69503546099291, + "transfer_atk_acc": 16.48936170212766 + } +] \ No newline at end of file diff --git a/results_transfer/google-flan-ul2_vicuna-13b/stresstest_3_shot.json b/results_transfer/google-flan-ul2_vicuna-13b/stresstest_3_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..685ece69446d9378b52e452512a30b442b614ea0 --- /dev/null +++ b/results_transfer/google-flan-ul2_vicuna-13b/stresstest_3_shot.json @@ -0,0 +1,162 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 86.4, + "origin_prompt": "Review the sentence below and identify whether its grammar is 'Acceptable' or 'Unacceptable'.", + "atk_acc": 85.4, + "atk_prompt": "Review the sentence below and identify whether its grammar is 'Acceptable' or 'Unacceptable' and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 26.0, + "transfer_atk_acc": 6.6000000000000005 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 86.6, + "origin_prompt": "While performing grammar analysis, classify the grammar of the following sentence as 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar.", + "atk_acc": 86.1, + "atk_prompt": "While performing grammar analysis, classify the grammar of the following sentence as 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 49.9, + "transfer_atk_acc": 30.9 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 86.76, + "origin_prompt": "Compare the following sentences and determine if they share the same semantic meaning by responding with 'equivalent' or 'not_equivalent'.", + "atk_acc": 85.78, + "atk_prompt": "Compare the following sentences and determine if they share the same semantic meaning by responding with 'equivalent' or 'not_equivalent' and false is not true . ", + "transfer_ori_acc": 62.254901960784316, + "transfer_atk_acc": 66.42156862745098 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 87.5, + "origin_prompt": "As a textual similarity evaluator, analyze the following pair of sentences and determine if they are semantically 'equivalent' or 'not_equivalent'.", + "atk_acc": 87.01, + "atk_prompt": "As a textual similarity evaluator, analyze the following pair of sentences and determine if they are semantically 'equivalent' or 'not_equivalent' and false is not true . ", + "transfer_ori_acc": 59.31372549019608, + "transfer_atk_acc": 63.725490196078425 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 94.0, + "origin_prompt": "Analyze the context and question, and indicate if the context entails the answer by choosing 'entailment' or 'not_entailment'.", + "atk_acc": 93.9, + "atk_prompt": "Analyze the context and question, and indicate if the context entails the answer by choosing 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 35.0, + "transfer_atk_acc": 12.1 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 93.9, + "origin_prompt": "As a semantic researcher, evaluate whether the provided context supports the answer to the question and choose 'entailment' or 'not_entailment'.", + "atk_acc": 94.0, + "atk_prompt": "As a semantic researcher, evaluate whether the provided context supports the answer to the question and choose 'entailment' or 'not_entailment' and true is true . ", + "transfer_ori_acc": 41.6, + "transfer_atk_acc": 40.9 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 88.5, + "origin_prompt": "Are the following two questions equivalent or not? Answer me with \"equivalent\" or \"not_equivalent\".", + "atk_acc": 88.0, + "atk_prompt": "Are the following two questions equivalent or not? Answer me with \"equivalent\" or \"not_equivalent and true is true \". ", + "transfer_ori_acc": 34.8, + "transfer_atk_acc": 47.8 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 89.1, + "origin_prompt": "Functioning as a question similarity evaluation tool, analyze the given questions and decide if they share the same meaning, responding with 'equivalent' or 'not_equivalent'.", + "atk_acc": 88.9, + "atk_prompt": "Functioning as a question similarity evaluation tool, analyze the given questions and decide if they share the same meaning, responding with 'equivalent' or 'not_equivalent' and false is not true . ", + "transfer_ori_acc": 32.9, + "transfer_atk_acc": 41.4 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 93.5, + "origin_prompt": "Does the relationship between the given sentences represent entailment or not_entailment? Respond with 'entailment' or 'not_entailment'.", + "atk_acc": 92.78, + "atk_prompt": "Does the relationship between the given sentences represent entailment or not_entailment? Respond with 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 46.20938628158845, + "transfer_atk_acc": 35.37906137184115 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 93.5, + "origin_prompt": "As a tool for determining entailment relationships, review the two statements and categorize their connection as either 'entailment' or 'not_entailment'.", + "atk_acc": 92.06, + "atk_prompt": "As a tool for determining entailment relationships, review the two statements and categorize their connection as either 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 28.158844765342963, + "transfer_atk_acc": 5.776173285198556 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 96.79, + "origin_prompt": "Determine the overall sentiment of this sentence, categorizing it as 'positive' or 'negative'.", + "atk_acc": 96.67, + "atk_prompt": "Determine the overall sentiment of this sentence, categorizing it as 'positive' or 'negative' and false is not true . ", + "transfer_ori_acc": 0.573394495412844, + "transfer_atk_acc": 0.11467889908256881 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 96.56, + "origin_prompt": "As a sentiment classifier, determine whether the following text is 'positive' or 'negative'.", + "atk_acc": 96.33, + "atk_prompt": "As a sentiment classifier, determine whether the following text is 'positive' or 'negative' and false is not true . ", + "transfer_ori_acc": 15.36697247706422, + "transfer_atk_acc": 8.486238532110093 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 77.46, + "origin_prompt": "Please classify the relationship between the provided sentences as 'entailment' or 'not_entailment'.", + "atk_acc": 74.65, + "atk_prompt": "Please classify the relationship between the provided sentences as 'entailment' or 'not_entailment' and false is not true . ", + "transfer_ori_acc": 21.12676056338028, + "transfer_atk_acc": 9.859154929577464 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 76.06, + "origin_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment'.", + "atk_acc": 74.65, + "atk_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment' and false is not true . ", + "transfer_ori_acc": 23.943661971830984, + "transfer_atk_acc": 32.3943661971831 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 53.9, + "origin_prompt": "Below are multiple-choice question concerning {}. Indicate your response with 'A', 'B', 'C', or 'D'.", + "atk_acc": 51.77, + "atk_prompt": "Below are multiple-choice question concerning {}. Indicate your response with 'A', 'B', 'C', or 'D' and false is not true . ", + "transfer_ori_acc": 6.382978723404255, + "transfer_atk_acc": 9.042553191489363 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 54.08, + "origin_prompt": "Considering your familiarity with {}, attend to the following multiple-choice question by picking 'A', 'B', 'C', or 'D'.", + "atk_acc": 51.42, + "atk_prompt": "Considering your familiarity with {}, attend to the following multiple-choice question by picking 'A', 'B', 'C', or 'D' and false is not true .", + "transfer_ori_acc": 8.156028368794328, + "transfer_atk_acc": 16.666666666666664 + } +] \ No newline at end of file diff --git a/results_transfer/google-flan-ul2_vicuna-13b/textbugger_0_shot.json b/results_transfer/google-flan-ul2_vicuna-13b/textbugger_0_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..bb55971a9700ce960fde11c7f8db1297f6194e05 --- /dev/null +++ b/results_transfer/google-flan-ul2_vicuna-13b/textbugger_0_shot.json @@ -0,0 +1,162 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 86.2, + "origin_prompt": "Assess the following sentence and determine if it is grammatically correct. Respond with 'Acceptable' or 'Unacceptable':", + "atk_acc": 78.7, + "atk_prompt": "Assess the following sentence and identify if it is pronouns cоrrect. Replying with 'Acceptable' or 'Unacceptable':", + "transfer_ori_acc": 69.0, + "transfer_atk_acc": 67.4 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 86.0, + "origin_prompt": "As a grammar identification system, examine the provided sentence and respond with 'acceptable' for grammatically correct sentences or 'unacceptable' for incorrect ones:", + "atk_acc": 81.0, + "atk_prompt": "Aѕ a grammar identificatiоn systems, examine the provided sentence and respoոd with 'acceptable' for pronouns rightness sentence or 'unacceptable' for incorrect those:", + "transfer_ori_acc": 9.6, + "transfer_atk_acc": 22.2 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 86.76, + "origin_prompt": "Please analyze the provided sentences and indicate if their meanings are the same by choosing 'equivalent' or 'not_equivalent'.", + "atk_acc": 83.09, + "atk_prompt": "Invited analyze the provideԁ verdicts and indicate if their meanings are the same by selecting 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 54.166666666666664, + "transfer_atk_acc": 12.009803921568627 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 87.99, + "origin_prompt": "In your capacity as a language analyst, assess the following sentences and classify their similarity as 'equivalent' or 'not_equivalent'.", + "atk_acc": 81.13, + "atk_prompt": "In your capacity as a linguistics aոalyst, evaluate the following sentences and classify their metaphor as 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 51.225490196078425, + "transfer_atk_acc": 26.225490196078432 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 94.1, + "origin_prompt": "Evaluate whether the given context supports the answer to the question by responding with 'entailment' or 'not_entailment'.", + "atk_acc": 93.3, + "atk_prompt": "Evaluate whetheⲅ the given context favours the answer to the question by responding with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 26.5, + "transfer_atk_acc": 2.6 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 94.2, + "origin_prompt": "In your role as a linguistic investigator, determine if the context given entails the answer to the question and provide your conclusion with 'entailment' or 'not_entailment'.", + "atk_acc": 93.1, + "atk_prompt": "In your role as a linguistic searchers, ԁetermine if the context awarded presupposes the answer to the questions and provide your conclusions with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 31.5, + "transfer_atk_acc": 19.2 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 88.3, + "origin_prompt": "Do these two sentences convey the same meaning? Indicate with 'equivalent' or 'not_equivalent'.", + "atk_acc": 86.4, + "atk_prompt": "Do these two verdicts convey the same connotation? Denotes with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 28.7, + "transfer_atk_acc": 18.7 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 88.5, + "origin_prompt": "As a tool for determining question equivalence, review the questions and categorize their similarity as either 'equivalent' or 'not_equivalent'.", + "atk_acc": 78.2, + "atk_prompt": "As a tool for deciding question equivalence, review the problems and categorize their metaphor as neither 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 8.200000000000001, + "transfer_atk_acc": 9.6 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 94.22, + "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.", + "atk_acc": 89.89, + "atk_prompt": "Are the subsequent secondly sentencing entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word. ", + "transfer_ori_acc": 27.075812274368232, + "transfer_atk_acc": 10.830324909747292 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 93.86, + "origin_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment':", + "atk_acc": 90.25, + "atk_prompt": "Throughout your role as an entailment analysis instrument, assess the relationship between the given punishments and classify it as 'entailment' or 'not_entailment':", + "transfer_ori_acc": 44.04332129963899, + "transfer_atk_acc": 14.079422382671481 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 96.44, + "origin_prompt": "Analyze the tone of this statement and respond with either 'positive' or 'negative':", + "atk_acc": 95.76, + "atk_prompt": "Analysis the tone of this speeches and respond with or 'positive' or 'negative': ", + "transfer_ori_acc": 47.935779816513765, + "transfer_atk_acc": 13.073394495412844 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 96.33, + "origin_prompt": "As a sentiment classifier, determine whether the following text is 'positive' or 'negative'. Please classify:", + "atk_acc": 93.92, + "atk_prompt": "Aѕ a emotion classifier, determine whether the follow ing text is 'positive' or 'negative'. Inviting classify: ", + "transfer_ori_acc": 33.94495412844037, + "transfer_atk_acc": 40.825688073394495 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 78.87, + "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.", + "atk_acc": 74.65, + "atk_prompt": "Are the flolowing two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one adjective. ", + "transfer_ori_acc": 25.352112676056336, + "transfer_atk_acc": 4.225352112676056 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 80.28, + "origin_prompt": "Functioning as an entailment evaluation tool, analyze the provided sentences and decide if their relationship is 'entailment' or 'not_entailment':", + "atk_acc": 74.65, + "atk_prompt": "Functioոing as an entailment evaluation tool, analyze the provided sentences and decіde if their rapport is 'entailment' or 'not_entailment':", + "transfer_ori_acc": 43.66197183098591, + "transfer_atk_acc": 5.633802816901409 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 52.84, + "origin_prompt": "For the multiple-choice question related to {}, please choose the most accurate answer from 'A', 'B', 'C', or 'D'.", + "atk_acc": 51.24, + "atk_prompt": "In the multiple-choice question rel ated to {}, inviting choose the most accurate answer from 'A', 'B', 'C', or 'D'. ", + "transfer_ori_acc": 18.79432624113475, + "transfer_atk_acc": 20.56737588652482 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 53.19, + "origin_prompt": "As a knowledgeable individual in {}, provide your response to the multiple-choice question by choosing 'A', 'B', 'C', or 'D'.", + "atk_acc": 51.24, + "atk_prompt": "As a knowledgeable indiivdual in {}, provide your replies to the multiple-choice problem by choosing 'A', 'B', 'C', or 'D'.", + "transfer_ori_acc": 22.69503546099291, + "transfer_atk_acc": 23.04964539007092 + } +] \ No newline at end of file diff --git a/results_transfer/google-flan-ul2_vicuna-13b/textbugger_3_shot.json b/results_transfer/google-flan-ul2_vicuna-13b/textbugger_3_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..b5ad70b8bd126c4d8c30fda478c1f4922db8fd35 --- /dev/null +++ b/results_transfer/google-flan-ul2_vicuna-13b/textbugger_3_shot.json @@ -0,0 +1,162 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 86.4, + "origin_prompt": "Examine the given sentence and decide if it is grammatically sound. Answer with either 'Acceptable' or 'Unacceptable'.", + "atk_acc": 68.1, + "atk_prompt": "Examine the given condemnation and decide if it is pronouns sound. Answer with either 'Acceptable' or 'Unacceptable'. ", + "transfer_ori_acc": 0.8999999999999999, + "transfer_atk_acc": 23.200000000000003 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 86.9, + "origin_prompt": "In the capacity of a grammar assessment system, indicate if the structure of the provided sentence is grammatically correct, responding with 'acceptable' or 'unacceptable'.", + "atk_acc": 85.0, + "atk_prompt": "In the ability of a grammar assessment system, denotes if the structure of the provided sentence is grammatical corrected, responding with 'acceptable' or 'unacceptable'. ", + "transfer_ori_acc": 30.5, + "transfer_atk_acc": 69.1 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 87.01, + "origin_prompt": "Can the given sentences be considered semantically identical? Please reply with 'equivalent' or 'not_equivalent'.", + "atk_acc": 83.82, + "atk_prompt": "Can the given sentences be deemed semantically idntical? Please reply with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 65.44117647058823, + "transfer_atk_acc": 68.13725490196079 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 87.5, + "origin_prompt": "In your capacity as a language analyst, assess the following sentences and classify their similarity as 'equivalent' or 'not_equivalent'.", + "atk_acc": 83.58, + "atk_prompt": "In your capacity as a language ɑnalyst, assess the following sentenes and classify their metaphor as 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 65.93137254901961, + "transfer_atk_acc": 64.70588235294117 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 94.0, + "origin_prompt": "Review the given context and question, and decide if the context contains enough information to support the answer by selecting 'entailment' or 'not_entailment'.", + "atk_acc": 92.9, + "atk_prompt": "Review the given coոtext and question, and dcide if the context contains enough particulars to support the answeⲅ by selecting 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 35.6, + "transfer_atk_acc": 27.500000000000004 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 93.7, + "origin_prompt": "As a semantic interpreter, assess whether the provided context supports the answer to the given question and answer with 'entailment' or 'not_entailment'.", + "atk_acc": 92.3, + "atk_prompt": "Iike a semantic interpretation, evaluated lf the prоvided context supports the answer to the given question and answers with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 33.5, + "transfer_atk_acc": 12.1 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 88.8, + "origin_prompt": "Determine if the given pair of statements can be considered the same by responding with 'equivalent' or 'not_equivalent'.", + "atk_acc": 87.6, + "atk_prompt": "Defining if the given paired of statements can be considered the same by responds with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 29.9, + "transfer_atk_acc": 31.5 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 90.0, + "origin_prompt": "As a tool for determining question equivalence, review the questions and categorize their similarity as either 'equivalent' or 'not_equivalent'.", + "atk_acc": 87.6, + "atk_prompt": "As a tool for deciding queѕtion equivalence, review the questions and classifications their portrait as neither 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 31.7, + "transfer_atk_acc": 32.300000000000004 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 93.5, + "origin_prompt": "Identify whether the given pair of sentences demonstrates entailment or not_entailment. Answer with 'entailment' or 'not_entailment'.", + "atk_acc": 92.42, + "atk_prompt": "Identifying whether the given pair of sentencing demonstrates entailment or not_entailment. Respond with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 7.2202166064981945, + "transfer_atk_acc": 21.299638989169676 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 93.86, + "origin_prompt": "Working as an entailment classifier, identify whether the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment'.", + "atk_acc": 90.97, + "atk_prompt": "Working as an entailment classifier, identify both the given piar of sentences displaying entailment or not_entailment. Respond with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 6.4981949458483745, + "transfer_atk_acc": 35.74007220216607 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 96.79, + "origin_prompt": "Determine the overall sentiment of this sentence, categorizing it as 'positive' or 'negative'.", + "atk_acc": 95.87, + "atk_prompt": "Determine the general sentiment of this condemnation, categorizing it as 'positive' or 'negative'. ", + "transfer_ori_acc": 0.573394495412844, + "transfer_atk_acc": 0.573394495412844 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 96.56, + "origin_prompt": "As a sentiment classifier, determine whether the following text is 'positive' or 'negative'.", + "atk_acc": 95.18, + "atk_prompt": "As a emotion classifier, determine whether the follоwing wording is 'positive' or 'negative'. ", + "transfer_ori_acc": 15.36697247706422, + "transfer_atk_acc": 19.26605504587156 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 77.46, + "origin_prompt": "Assess the connection between the following sentences and classify it as 'entailment' or 'not_entailment'.", + "atk_acc": 76.06, + "atk_prompt": "Assss the connection between the following sentences and classify it as 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 19.718309859154928, + "transfer_atk_acc": 18.30985915492958 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 76.06, + "origin_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment'.", + "atk_acc": 71.83, + "atk_prompt": "In your role as an entailment analysis tool, assess the relationship between the given punishments and categorize it as 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 23.943661971830984, + "transfer_atk_acc": 19.718309859154928 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 53.9, + "origin_prompt": "Below are multiple-choice question concerning {}. Indicate your response with 'A', 'B', 'C', or 'D'.", + "atk_acc": 52.3, + "atk_prompt": "Belоw are multipl-echoice quеstion concening {}. Indicate your response with 'A', 'B', 'C', or 'D'. ", + "transfer_ori_acc": 6.382978723404255, + "transfer_atk_acc": 7.092198581560284 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 54.08, + "origin_prompt": "Considering your familiarity with {}, attend to the following multiple-choice question by picking 'A', 'B', 'C', or 'D'.", + "atk_acc": 52.48, + "atk_prompt": "Considering your familiarity with {}, attend to the following mutliple-choice matter by picking 'A', 'B', 'C', or 'D'.", + "transfer_ori_acc": 8.156028368794328, + "transfer_atk_acc": 7.801418439716312 + } +] \ No newline at end of file diff --git a/results_transfer/google-flan-ul2_vicuna-13b/textfooler_0_shot.json b/results_transfer/google-flan-ul2_vicuna-13b/textfooler_0_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..b65700a65f267919c3ba57cc292ab8a91531561d --- /dev/null +++ b/results_transfer/google-flan-ul2_vicuna-13b/textfooler_0_shot.json @@ -0,0 +1,162 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 86.1, + "origin_prompt": "Is the provided sentence grammatically correct? Respond with 'Acceptable' or 'Unacceptable':", + "atk_acc": 30.9, + "atk_prompt": "Is the brought chastisement grammatically appropriate? Retaliated with 'Acceptable' or 'Unacceptable':", + "transfer_ori_acc": 61.0, + "transfer_atk_acc": 28.9 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 86.2, + "origin_prompt": "As an instrument for grammar evaluation, consider the sentence and determine if its grammar is correct, responding with 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar:", + "atk_acc": 33.1, + "atk_prompt": "As an instrument for grammar appraisals, considerations the convicting and determine if its verbs is correct, responding with 'acceptable' for proper grammar or 'unacceptable' for misspelled vocabulary:", + "transfer_ori_acc": 36.199999999999996, + "transfer_atk_acc": 56.2 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 86.76, + "origin_prompt": "Please analyze the provided sentences and indicate if their meanings are the same by choosing 'equivalent' or 'not_equivalent'.", + "atk_acc": 75.98, + "atk_prompt": "Asks analysis the conferred priors and specified if their meanings are the same by selecting 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 54.166666666666664, + "transfer_atk_acc": 23.52941176470588 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 87.99, + "origin_prompt": "In your capacity as a language analyst, assess the following sentences and classify their similarity as 'equivalent' or 'not_equivalent'.", + "atk_acc": 76.23, + "atk_prompt": "At your power as a language psych, assess the later judgements and categories their metaphor as 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 51.225490196078425, + "transfer_atk_acc": 25.245098039215684 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 94.1, + "origin_prompt": "Based on the information in the context, decide if the answer to the question is justified by choosing 'entailment' or 'not_entailment'.", + "atk_acc": 67.1, + "atk_prompt": "Baseline on the detail in the framework, chosen if the answers to the topic is motivated by selector 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 34.1, + "transfer_atk_acc": 1.9 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 94.2, + "origin_prompt": "As a semantic researcher, evaluate whether the provided context supports the answer to the question and choose 'entailment' or 'not_entailment'.", + "atk_acc": 52.7, + "atk_prompt": "Iike a semantic savant, determine whether the provided framework encourages the rebuttal to the topics and option 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 13.4, + "transfer_atk_acc": 7.199999999999999 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 88.3, + "origin_prompt": "Do these two sentences convey the same meaning? Indicate with 'equivalent' or 'not_equivalent'.", + "atk_acc": 77.6, + "atk_prompt": "Doing these two condemning communicate the same feeling? Stating with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 28.7, + "transfer_atk_acc": 4.5 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 88.5, + "origin_prompt": "As a tool for determining question equivalence, review the questions and categorize their similarity as either 'equivalent' or 'not_equivalent'.", + "atk_acc": 83.4, + "atk_prompt": "As a utility for definition questions equivalence, revising the question and ranks their metaphor as either 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 8.200000000000001, + "transfer_atk_acc": 4.6 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 94.22, + "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.", + "atk_acc": 91.34, + "atk_prompt": "Represented the aftermath two phrases entailment or not_entailment? Address me with \"entailment\" or \"not_entailment\", just one verb. ", + "transfer_ori_acc": 27.075812274368232, + "transfer_atk_acc": 22.382671480144403 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 93.86, + "origin_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment':", + "atk_acc": 90.61, + "atk_prompt": "At your role as an entailment analysis tool, assess the affairs between the given condemnation and classify it as 'entailment' or 'not_entailment':", + "transfer_ori_acc": 44.04332129963899, + "transfer_atk_acc": 29.24187725631769 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 96.33, + "origin_prompt": "Review this statement and decide whether it has a 'positive' or 'negative' sentiment:", + "atk_acc": 94.72, + "atk_prompt": "Considering this speech and decide whether it has a 'positive' or 'negative' foreboding: ", + "transfer_ori_acc": 42.54587155963303, + "transfer_atk_acc": 39.908256880733944 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 96.33, + "origin_prompt": "As a sentiment classifier, determine whether the following text is 'positive' or 'negative'. Please classify:", + "atk_acc": 92.89, + "atk_prompt": "As a emotion classifier, detects whether the suite texts is 'positive' or 'negative'. Prayers classify: ", + "transfer_ori_acc": 33.94495412844037, + "transfer_atk_acc": 34.977064220183486 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 80.28, + "origin_prompt": "Assess the connection between the following sentences and classify it as 'entailment' or 'not_entailment'.", + "atk_acc": 67.61, + "atk_prompt": "Assess the connections between the afterward punishment and classifies it as 'entailment' or 'not_entailment'.", + "transfer_ori_acc": 38.028169014084504, + "transfer_atk_acc": 11.267605633802818 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 78.87, + "origin_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment':", + "atk_acc": 71.83, + "atk_prompt": "In your role as an entailment analysis tool, assess the coitus between the given sentencing and classify it as 'entailment' or 'not_entailment':", + "transfer_ori_acc": 43.66197183098591, + "transfer_atk_acc": 43.66197183098591 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 52.84, + "origin_prompt": "Evaluate the multiple-choice question about {} and select the most fitting response from 'A', 'B', 'C', or 'D'.", + "atk_acc": 32.09, + "atk_prompt": "Evaluation the multiple-choice topic about {} and select the most splice rebuttal from 'A', 'B', 'C', or 'D'. ", + "transfer_ori_acc": 20.74468085106383, + "transfer_atk_acc": 21.45390070921986 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 53.19, + "origin_prompt": "As a knowledgeable individual in {}, provide your response to the multiple-choice question by choosing 'A', 'B', 'C', or 'D'.", + "atk_acc": 39.36, + "atk_prompt": "As a knowledgeable everybody in {}, prescribes your rebuttal to the multiple-choice item by chooses 'A', 'B', 'C', or 'D'.", + "transfer_ori_acc": 22.69503546099291, + "transfer_atk_acc": 22.5177304964539 + } +] \ No newline at end of file diff --git a/results_transfer/google-flan-ul2_vicuna-13b/textfooler_3_shot.json b/results_transfer/google-flan-ul2_vicuna-13b/textfooler_3_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..1e4dff4dab5f426dd15f996423a3660a18d67d65 --- /dev/null +++ b/results_transfer/google-flan-ul2_vicuna-13b/textfooler_3_shot.json @@ -0,0 +1,162 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 86.4, + "origin_prompt": "Examine the given sentence and decide if it is grammatically sound. Answer with either 'Acceptable' or 'Unacceptable'.", + "atk_acc": 39.5, + "atk_prompt": "Inspect the made condemning and select if it is grammatically healthier. Answers with either 'Acceptable' or 'Unacceptable'. ", + "transfer_ori_acc": 0.8999999999999999, + "transfer_atk_acc": 5.800000000000001 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 86.6, + "origin_prompt": "While performing grammar analysis, classify the grammar of the following sentence as 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar.", + "atk_acc": 73.0, + "atk_prompt": "While performing grammar scans, classify the verbs of the afterward condemned as 'acceptable' for correctness grammar or 'unacceptable' for inappropriate vocabulary. ", + "transfer_ori_acc": 49.9, + "transfer_atk_acc": 7.7 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 86.52, + "origin_prompt": "Compare the following sentences and determine if they share the same semantic meaning by responding with 'equivalent' or 'not_equivalent'.", + "atk_acc": 83.58, + "atk_prompt": "Compare the later judgements and defines if they exchange the same semantic sensation by answered with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 62.254901960784316, + "transfer_atk_acc": 72.05882352941177 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 89.22, + "origin_prompt": "As a sentence similarity evaluator, analyze the provided sentences and indicate if their meanings are 'equivalent' or 'not_equivalent'.", + "atk_acc": 85.29, + "atk_prompt": "As a sentence metaphor evaluator, analyze the brought priors and demonstrating if their meanings are 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 68.13725490196079, + "transfer_atk_acc": 70.58823529411765 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 94.0, + "origin_prompt": "Analyze the context and question, and indicate if the context entails the answer by choosing 'entailment' or 'not_entailment'.", + "atk_acc": 54.2, + "atk_prompt": "Explore the context and question, and suggest if the context involves the rebuttal by selected 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 35.0, + "transfer_atk_acc": 21.0 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 93.9, + "origin_prompt": "As a semantic researcher, evaluate whether the provided context supports the answer to the question and choose 'entailment' or 'not_entailment'.", + "atk_acc": 61.5, + "atk_prompt": "Because a semantic inquisitors, appraise whether the provided context alleges the rebuttal to the issuing and selector 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 41.6, + "transfer_atk_acc": 36.7 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 88.7, + "origin_prompt": "Examine the following expressions and tell me if they are alike in meaning by using 'equivalent' or 'not_equivalent'.", + "atk_acc": 85.9, + "atk_prompt": "Examine the afterward words and saying me if they are uniformly in connotation by using 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 44.9, + "transfer_atk_acc": 34.0 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 90.0, + "origin_prompt": "As a tool for determining question equivalence, review the questions and categorize their similarity as either 'equivalent' or 'not_equivalent'.", + "atk_acc": 87.6, + "atk_prompt": "Because a tool for determining question equivalence, examination the queries and classifications their likeness as either 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 31.7, + "transfer_atk_acc": 31.2 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 93.5, + "origin_prompt": "Identify whether the given pair of sentences demonstrates entailment or not_entailment. Answer with 'entailment' or 'not_entailment'.", + "atk_acc": 92.42, + "atk_prompt": "Deciding whether the given pair of sentencing demonstrates entailment or not_entailment. Answer with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 7.2202166064981945, + "transfer_atk_acc": 47.292418772563174 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 93.5, + "origin_prompt": "As a tool for determining entailment relationships, review the two statements and categorize their connection as either 'entailment' or 'not_entailment'.", + "atk_acc": 91.34, + "atk_prompt": "Because a device for determining entailment relationships, review the two words and categorize their connection as either 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 28.158844765342963, + "transfer_atk_acc": 9.025270758122744 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 96.79, + "origin_prompt": "Determine the overall sentiment of this sentence, categorizing it as 'positive' or 'negative'.", + "atk_acc": 93.46, + "atk_prompt": "Determine the plenary senses of this condemning, categorizing it as 'positive' or 'negative'. ", + "transfer_ori_acc": 0.573394495412844, + "transfer_atk_acc": 1.6055045871559634 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 96.56, + "origin_prompt": "As a sentiment classifier, determine whether the following text is 'positive' or 'negative'.", + "atk_acc": 95.18, + "atk_prompt": "As a emotion classifier, ascertain whether the suite wording is 'positive' or 'negative'. ", + "transfer_ori_acc": 15.36697247706422, + "transfer_atk_acc": 18.807339449541285 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 77.46, + "origin_prompt": "Assess the connection between the following sentences and classify it as 'entailment' or 'not_entailment'.", + "atk_acc": 73.24, + "atk_prompt": "Appraisal the attach between the suite condemns and classify it as 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 19.718309859154928, + "transfer_atk_acc": 7.042253521126761 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 76.06, + "origin_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment'.", + "atk_acc": 73.24, + "atk_prompt": "In your role as an entailment analysis tool, assess the reports between the given sentences and classify it as 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 23.943661971830984, + "transfer_atk_acc": 23.943661971830984 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 54.08, + "origin_prompt": "For the multiple-choice question related to {}, please choose the most accurate answer from 'A', 'B', 'C', or 'D'.", + "atk_acc": 24.82, + "atk_prompt": "Within the multiple-choice item relevant to {}, please decides the most careful rebuttal from 'A', 'B', 'C', or 'D'. ", + "transfer_ori_acc": 7.2695035460992905, + "transfer_atk_acc": 6.560283687943262 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 53.72, + "origin_prompt": "As an expert in {}, respond to the following multiple-choice question by selecting 'A', 'B', 'C', or 'D'.", + "atk_acc": 51.95, + "atk_prompt": "As an whiz in {}, respond to the following multiple-choice question by decided 'A', 'B', 'C', or 'D'.", + "transfer_ori_acc": 9.042553191489363, + "transfer_atk_acc": 10.28368794326241 + } +] \ No newline at end of file diff --git a/results_transfer/google-flan-ul2_vicuna-13b/translation_0_shot.json b/results_transfer/google-flan-ul2_vicuna-13b/translation_0_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..2e58c0b607f4fe5dc5ab1702abf122b555c8f713 --- /dev/null +++ b/results_transfer/google-flan-ul2_vicuna-13b/translation_0_shot.json @@ -0,0 +1,82 @@ +[ + { + "dataset": "cola", + "type": "translation", + "origin_acc": 86.2, + "origin_prompt": "Assess the following sentence and determine if it is grammatically correct. Respond with 'Acceptable' or 'Unacceptable':", + "atk_acc": 84.7, + "atk_prompt": "As a tool for assessing semantic relevance, categorize the sentence as 'acceptable' or 'unacceptable' according to its meaning and context.", + "transfer_ori_acc": 69.0, + "transfer_atk_acc": 54.2 + }, + { + "dataset": "mrpc", + "type": "translation", + "origin_acc": 88.73, + "origin_prompt": "As a sentence similarity evaluator, analyze the provided sentences and indicate if their meanings are 'equivalent' or 'not_equivalent'.", + "atk_acc": 81.13, + "atk_prompt": "Whether a given pair of sentences is 'equivalent' or 'not_equivalent' depends on the context.", + "transfer_ori_acc": 25.98039215686275, + "transfer_atk_acc": 21.323529411764707 + }, + { + "dataset": "qnli", + "type": "translation", + "origin_acc": 94.6, + "origin_prompt": "Based on the provided context and question, decide if the information supports the answer by responding with 'entailment' or 'not_entailment'.", + "atk_acc": 78.9, + "atk_prompt": "By analyzing the relations between sentences, determine if a given sentence does not necessarily include other sentences and answer with 'entailment' or 'not_entailment'.", + "transfer_ori_acc": 38.4, + "transfer_atk_acc": 33.800000000000004 + }, + { + "dataset": "qqp", + "type": "translation", + "origin_acc": 89.2, + "origin_prompt": "While performing question comparison analysis, classify the similarity of the following questions as 'equivalent' for equivalent questions or 'not_equivalent' for different questions.", + "atk_acc": 87.4, + "atk_prompt": "Classify sentence equivalence as 'equivalent' or 'not_equivalent' by judging whether two sentences have the same meaning to each other.", + "transfer_ori_acc": 12.2, + "transfer_atk_acc": 12.2 + }, + { + "dataset": "rte", + "type": "translation", + "origin_acc": 94.58, + "origin_prompt": "Working as an entailment classifier, identify whether the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment':", + "atk_acc": 89.89, + "atk_prompt": "Examine the paraphrases of a given sentence and classify them 'entailment' or 'not_entailment'.", + "transfer_ori_acc": 49.81949458483754, + "transfer_atk_acc": 27.075812274368232 + }, + { + "dataset": "sst2", + "type": "translation", + "origin_acc": 96.56, + "origin_prompt": "In the role of a sentiment analysis tool, respond with 'positive' or 'negative' to classify this statement:", + "atk_acc": 93.46, + "atk_prompt": "Please analyze the emotion in this sentence and classify it as 'positive' or 'negative'.", + "transfer_ori_acc": 32.798165137614674, + "transfer_atk_acc": 46.559633027522935 + }, + { + "dataset": "wnli", + "type": "translation", + "origin_acc": 80.28, + "origin_prompt": "Assess the connection between the following sentences and classify it as 'entailment' or 'not_entailment'.", + "atk_acc": 73.24, + "atk_prompt": "Given a couple of phrases, label their relationship as 'entailment' if one necessarily implies the other, or as 'not_entailment' if there is no such implication.", + "transfer_ori_acc": 38.028169014084504, + "transfer_atk_acc": 42.25352112676056 + }, + { + "dataset": "mmlu", + "type": "translation", + "origin_acc": 53.19, + "origin_prompt": "As an expert in {}, respond to the following multiple-choice question by selecting 'A', 'B', 'C', or 'D'.", + "atk_acc": 36.52, + "atk_prompt": "Choose the correct answer to the multiple-choice question below. Is '{}' an 'A', 'B', 'C', or 'D'.", + "transfer_ori_acc": 23.04964539007092, + "transfer_atk_acc": 19.680851063829788 + } +] \ No newline at end of file diff --git a/results_transfer/google-flan-ul2_vicuna-13b/translation_3_shot.json b/results_transfer/google-flan-ul2_vicuna-13b/translation_3_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..b49f31885530e206d3b6433d508d01ee897c45d1 --- /dev/null +++ b/results_transfer/google-flan-ul2_vicuna-13b/translation_3_shot.json @@ -0,0 +1,82 @@ +[ + { + "dataset": "cola", + "type": "translation", + "origin_acc": 86.9, + "origin_prompt": "In the capacity of a grammar assessment system, indicate if the structure of the provided sentence is grammatically correct, responding with 'acceptable' or 'unacceptable'.", + "atk_acc": 84.6, + "atk_prompt": "As a tool for assessing semantic relevance, categorize the sentence as 'acceptable' or 'unacceptable' according to its meaning and context.", + "transfer_ori_acc": 30.5, + "transfer_atk_acc": 27.0 + }, + { + "dataset": "mrpc", + "type": "translation", + "origin_acc": 88.97, + "origin_prompt": "As a sentence similarity evaluator, analyze the provided sentences and indicate if their meanings are 'equivalent' or 'not_equivalent'.", + "atk_acc": 84.07, + "atk_prompt": "Whether a given pair of sentences is 'equivalent' or 'not_equivalent' depends on the context.", + "transfer_ori_acc": 68.13725490196079, + "transfer_atk_acc": 66.17647058823529 + }, + { + "dataset": "qnli", + "type": "translation", + "origin_acc": 94.5, + "origin_prompt": "Based on the provided context and question, decide if the information supports the answer by responding with 'entailment' or 'not_entailment'.", + "atk_acc": 83.0, + "atk_prompt": "By analyzing the relations between sentences, determine if a given sentence does not necessarily include other sentences and answer with 'entailment' or 'not_entailment'.", + "transfer_ori_acc": 32.4, + "transfer_atk_acc": 35.4 + }, + { + "dataset": "qqp", + "type": "translation", + "origin_acc": 90.0, + "origin_prompt": "As a tool for determining question equivalence, review the questions and categorize their similarity as either 'equivalent' or 'not_equivalent'.", + "atk_acc": 87.7, + "atk_prompt": "Classify sentence equivalence as 'equivalent' or 'not_equivalent' by judging whether two sentences have the same meaning to each other.", + "transfer_ori_acc": 31.7, + "transfer_atk_acc": 39.2 + }, + { + "dataset": "rte", + "type": "translation", + "origin_acc": 93.86, + "origin_prompt": "Working as an entailment classifier, identify whether the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment'.", + "atk_acc": 90.61, + "atk_prompt": "Examine the semantic match of a given sentence and categorize it as 'entailment' or 'not_entailment'.", + "transfer_ori_acc": 6.4981949458483745, + "transfer_atk_acc": 39.71119133574007 + }, + { + "dataset": "sst2", + "type": "translation", + "origin_acc": 96.9, + "origin_prompt": "Working as a sentiment analyzer, please indicate if the following text is 'positive' or 'negative'.", + "atk_acc": 94.38, + "atk_prompt": "As an emotion analysis tool, determine whether the emotion in the text is 'positive' or 'negative'.", + "transfer_ori_acc": 1.0321100917431194, + "transfer_atk_acc": 5.275229357798166 + }, + { + "dataset": "wnli", + "type": "translation", + "origin_acc": 77.46, + "origin_prompt": "Assess the connection between the following sentences and classify it as 'entailment' or 'not_entailment'.", + "atk_acc": 74.65, + "atk_prompt": "For the purpose of determining implicative relations, analyze the relations of the following sentences and classify them as 'entailment' or 'not_entailment'.", + "transfer_ori_acc": 19.718309859154928, + "transfer_atk_acc": 0.0 + }, + { + "dataset": "mmlu", + "type": "translation", + "origin_acc": 54.08, + "origin_prompt": "For the multiple-choice question related to {}, please choose the most accurate answer from 'A', 'B', 'C', or 'D'.", + "atk_acc": 51.95, + "atk_prompt": "Choose the correct answer to the multiple-choice question below. Is '{}' an 'A', 'B', 'C', or 'D'.", + "transfer_ori_acc": 7.2695035460992905, + "transfer_atk_acc": 8.156028368794328 + } +] \ No newline at end of file diff --git a/results_transfer/vicuna-13b_chatgpt/bertattack_0_shot.json b/results_transfer/vicuna-13b_chatgpt/bertattack_0_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..7d477d4a7621d873ad1cb91b6bb7552c5d5f4bf4 --- /dev/null +++ b/results_transfer/vicuna-13b_chatgpt/bertattack_0_shot.json @@ -0,0 +1,162 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 69.0, + "origin_prompt": "Assess the following sentence and determine if it is grammatically correct. Respond with 'Acceptable' or 'Unacceptable':", + "atk_acc": 1.7, + "atk_prompt": "check the below verb and consider if it is grammatically complete. act with 'Acceptable' or 'Unacceptable':", + "transfer_ori_acc": 76.5, + "transfer_atk_acc": 74.0 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 57.1, + "origin_prompt": "Functioning as a grammar evaluation tool, analyze the given sentence and decide if it is grammatically correct, responding with 'acceptable' or 'unacceptable':", + "atk_acc": 0.6, + "atk_prompt": "as as a grammar evaluation resource, analyze the next phrase and decide if it is grammatical+ correct, respond with 'acceptable' or 'unacceptable':", + "transfer_ori_acc": 79.5, + "transfer_atk_acc": 75.5 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 67.16, + "origin_prompt": "Are the meanings of the following pair of sentences the same? Answer with 'equivalent' or 'not_equivalent'.", + "atk_acc": 0.0, + "atk_prompt": "Are the meanings of the next in of sentences the same? q with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 72.5, + "transfer_atk_acc": 68.5 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 52.21, + "origin_prompt": "In your capacity as a semantic comparison tool, examine the given sentences and decide if their meanings are identical, responding with 'equivalent' or 'not_equivalent'.", + "atk_acc": 0.0, + "atk_prompt": "[UNK] your capacity as a semantic comparison tool, examine the given sentences and decide if their meanings are different, responding with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 75.0, + "transfer_atk_acc": 62.5 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 30.5, + "origin_prompt": "Review the given context and question, and decide if the context contains enough information to support the answer by selecting 'entailment' or 'not_entailment'.", + "atk_acc": 0.0, + "atk_prompt": "Review the given context and question, and decide if the context requires enough information to let the answer by choices 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 64.5, + "transfer_atk_acc": 63.5 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 38.4, + "origin_prompt": "As a textual inference expert, analyze if the answer to the question can be deduced from the provided context and select 'entailment' or 'not_entailment'.", + "atk_acc": 0.4, + "atk_prompt": "example a textual in expert, analyzing if the answer to the question can be deduced from the language context and think 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 80.0, + "transfer_atk_acc": 63.0 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 23.9, + "origin_prompt": "Can these two statements be considered equal in meaning? Answer with 'equivalent' or 'not_equivalent'.", + "atk_acc": 0.8, + "atk_prompt": "can these two symbols be considered equal in meaning? object with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 81.0, + "transfer_atk_acc": 12.0 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 0, + "origin_prompt": "", + "atk_acc": 100, + "atk_prompt": "", + "transfer_ori_acc": 0.0, + "transfer_atk_acc": 0.0 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 47.65, + "origin_prompt": "Please classify the relationship between the provided sentences as 'entailment' or 'not_entailment'.", + "atk_acc": 0.0, + "atk_prompt": "Please approach the relationship between the included sentences as 'entailment' or 'not_entailment'.", + "transfer_ori_acc": 79.5, + "transfer_atk_acc": 81.5 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 48.01, + "origin_prompt": "As an entailment identification system, examine the connection between the following sentences and respond with 'entailment' or 'not_entailment':", + "atk_acc": 1.44, + "atk_prompt": "she an entailment identification system, presented the connection between the following sentences and answered with 'entailment' or 'not_entailment':", + "transfer_ori_acc": 76.0, + "transfer_atk_acc": 75.5 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 48.17, + "origin_prompt": "After examining the following expression, label its emotion as either 'positive' or 'negative':", + "atk_acc": 0.57, + "atk_prompt": "After examination the given expression, type its emotion as of 'positive' or 'negative': ", + "transfer_ori_acc": 90.0, + "transfer_atk_acc": 92.0 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 37.27, + "origin_prompt": "As an emotion detector, determine if the provided passage conveys a 'positive' or 'negative' sentiment. Classify:", + "atk_acc": 0.46, + "atk_prompt": "As an emotion indicating, score if the provided reply conveys a 'positive' or 'negative' sentiment. classify: ", + "transfer_ori_acc": 88.5, + "transfer_atk_acc": 91.0 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 43.66, + "origin_prompt": "Determine if the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment'.", + "atk_acc": 0.0, + "atk_prompt": "Determine if the first pair of sentences demonstrate entailment or not_entailment. replied with 'entailment' or 'not_entailment'.", + "transfer_ori_acc": 63.38028169014085, + "transfer_atk_acc": 60.56338028169014 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 43.66, + "origin_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment':", + "atk_acc": 0.0, + "atk_prompt": "In your role as an entailment analysis tool, assessing the relationship between the formed sentences and classify it as 'entailment' or 'not_entailment':", + "transfer_ori_acc": 52.112676056338024, + "transfer_atk_acc": 52.112676056338024 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 25.09, + "origin_prompt": "Answer the subsequent multiple-choice question about {} by picking the right option among 'A', 'B', 'C', or 'D'.", + "atk_acc": 5.02, + "atk_prompt": "Answer the next multiple-s question about {} by picking the incorrect button or 'A', 'B', 'C', or 'D'. ", + "transfer_ori_acc": 62.0, + "transfer_atk_acc": 26.0 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 24.85, + "origin_prompt": "As an expert in {}, respond to the following multiple-choice question by selecting 'A', 'B', 'C', or 'D'.", + "atk_acc": 6.88, + "atk_prompt": "As an expert in {}, answer to the following multiple-guessing contestant by guessing 'A', 'B', 'C', or 'D'.", + "transfer_ori_acc": 61.5, + "transfer_atk_acc": 60.0 + } +] \ No newline at end of file diff --git a/results_transfer/vicuna-13b_chatgpt/bertattack_3_shot.json b/results_transfer/vicuna-13b_chatgpt/bertattack_3_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..36f4d2f2e47d483c1e5864a3f4e64314a1e6ed2f --- /dev/null +++ b/results_transfer/vicuna-13b_chatgpt/bertattack_3_shot.json @@ -0,0 +1,162 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 62.2, + "origin_prompt": "Assess the grammatical structure of the given sentence and classify it as 'Acceptable' or 'Unacceptable'.", + "atk_acc": 3.8, + "atk_prompt": "Assess the context t of the of sentence and judge it as 'Acceptable' or 'Unacceptable'. ", + "transfer_ori_acc": 79.5, + "transfer_atk_acc": 80.0 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 49.6, + "origin_prompt": "As a tool for determining grammatical correctness, review the sentence and categorize its grammar as either 'acceptable' or 'unacceptable'.", + "atk_acc": 0.0, + "atk_prompt": "should a be for calculating adjective correctness, review the sentence and categorize its i as or 'acceptable' or 'unacceptable'. ", + "transfer_ori_acc": 79.0, + "transfer_atk_acc": 82.0 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 69.36, + "origin_prompt": "Determine if the meanings of the following sentences are semantically equivalent by responding with 'equivalent' or 'not_equivalent'.", + "atk_acc": 33.33, + "atk_prompt": "Determine if the words of the following sentence are semanticdouble equivalent by thinking with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 71.0, + "transfer_atk_acc": 71.0 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 68.14, + "origin_prompt": "In the role of a sentence comparison analyst, assess the provided sentences and indicate if they convey the same meaning by selecting 'equivalent' or 'not_equivalent'.", + "atk_acc": 17.16, + "atk_prompt": "to the role of a face comparison instance, assess the following sentences and indicate if they describe the same person by selecting 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 71.0, + "transfer_atk_acc": 70.5 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 41.2, + "origin_prompt": "Based on the information in the context, decide if the answer to the question is justified by choosing 'entailment' or 'not_entailment'.", + "atk_acc": 0.0, + "atk_prompt": "Based on the information in the context, decide if the answer to the is is committed by versus 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 82.0, + "transfer_atk_acc": 79.5 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 35.5, + "origin_prompt": "As a textual inference expert, analyze if the answer to the question can be deduced from the provided context and select 'entailment' or 'not_entailment'.", + "atk_acc": 0.0, + "atk_prompt": "As a textual inference expert, write if the answer to the task can be deduced from the supplied instance and search 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 83.5, + "transfer_atk_acc": 81.0 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 41.8, + "origin_prompt": "Do the following expressions mean the same thing? Provide your answer as 'equivalent' or 'not_equivalent'.", + "atk_acc": 10.3, + "atk_prompt": "are the following expression do the same as? substitute your subject as 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 81.5, + "transfer_atk_acc": 80.5 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 33.0, + "origin_prompt": "In the role of a question similarity analyzer, examine the meaning of the provided questions and decide if they are equivalent, answering with 'equivalent' for equivalent questions or 'not_equivalent' for different questions.", + "atk_acc": 8.2, + "atk_prompt": "In the role of a topic similarities analyzer, examining the semantic of the questions the and decide if they are equivalent, answer with 'equivalent' for equivalent numbers or 'not_equivalent' for various respondents. ", + "transfer_ori_acc": 83.5, + "transfer_atk_acc": 83.0 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 50.54, + "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.", + "atk_acc": 1.08, + "atk_prompt": "pair the following opposite sentences entailment or not_entailment? answers me with \"entailment\" or \"not_entailment\", just 1 word. ", + "transfer_ori_acc": 82.5, + "transfer_atk_acc": 80.5 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 36.82, + "origin_prompt": "In the role of a semantic relationship analyzer, examine the connection between the given sentences and decide if they exhibit entailment or not_entailment. Answer with 'entailment' or 'not_entailment'.", + "atk_acc": 0.0, + "atk_prompt": "In the role of a descriptive relationship analyzeing, read the connection between the given sentences and learn if they exhibit entailment or not_entailment. answers with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 82.5, + "transfer_atk_acc": 85.0 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 12.5, + "origin_prompt": "After examining the following expression, label its emotion as either 'positive' or 'negative'.", + "atk_acc": 0.0, + "atk_prompt": "After examining the following action, type its emotion as either 'positive' or 'negative'. ", + "transfer_ori_acc": 96.5, + "transfer_atk_acc": 97.0 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 33.37, + "origin_prompt": "Taking on the role of an emotion classifier, specify if the provided phrase is 'positive' or 'negative'.", + "atk_acc": 0.11, + "atk_prompt": "[UNK] on the relationship of an emotion classtoken, choose if the provided phrase is 'positive' or 'negative'. ", + "transfer_ori_acc": 94.0, + "transfer_atk_acc": 94.0 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 23.94, + "origin_prompt": "Does the relationship between the given sentences represent entailment or not_entailment? Respond with 'entailment' or 'not_entailment'.", + "atk_acc": 0.0, + "atk_prompt": "Does the relationship between the given sentences represent entailment or not_entailment? q with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 67.6056338028169, + "transfer_atk_acc": 70.4225352112676 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 16.9, + "origin_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment'.", + "atk_acc": 0.0, + "atk_prompt": "[UNK] your role as an entailment and tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 60.56338028169014, + "transfer_atk_acc": 67.6056338028169 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 8.51, + "origin_prompt": "Evaluate the multiple-choice question about {} and select the most fitting response from 'A', 'B', 'C', or 'D'.", + "atk_acc": 3.55, + "atk_prompt": "i the multiple-a asked about {} and select the most fitting response from 'A', 'B', 'C', or 'D'. ", + "transfer_ori_acc": 65.5, + "transfer_atk_acc": 64.0 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 9.22, + "origin_prompt": "With your understanding of {}, kindly answer the subsequent multiple-choice question by selecting 'A', 'B', 'C', or 'D'.", + "atk_acc": 3.37, + "atk_prompt": "With your understand of {}, self answer the subsequent multiple-chosen questions by stating 'A', 'B', 'C', or 'D'.", + "transfer_ori_acc": 64.0, + "transfer_atk_acc": 66.0 + } +] \ No newline at end of file diff --git a/results_transfer/vicuna-13b_chatgpt/checklist_0_shot.json b/results_transfer/vicuna-13b_chatgpt/checklist_0_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..00da657e73dba3424f87473dfb66b72c91a5ef43 --- /dev/null +++ b/results_transfer/vicuna-13b_chatgpt/checklist_0_shot.json @@ -0,0 +1,162 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 69.0, + "origin_prompt": "Assess the following sentence and determine if it is grammatically correct. Respond with 'Acceptable' or 'Unacceptable':", + "atk_acc": 2.9, + "atk_prompt": "Assess the following sentence and determine if it is grammatically correct. Respond with 'Acceptable' or 'Unacceptable' MQnugHcaoy:", + "transfer_ori_acc": 76.5, + "transfer_atk_acc": 75.5 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 50.7, + "origin_prompt": "In the role of a syntax analyzer, examine the grammar of the provided sentence and decide if it is correct, answering with 'acceptable' for grammatically correct sentences or 'unacceptable' for incorrect ones:", + "atk_acc": 7.5, + "atk_prompt": "In the role of a syntax analyzer, examine the grammar of the provided sentence and decide if it is correct, answering with 'acceptable' for grammatically correct sentences or 'unacceptable' for incorrect ones Q47Jmd4lMV:", + "transfer_ori_acc": 79.0, + "transfer_atk_acc": 76.5 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 56.86, + "origin_prompt": "Do these two sentences have the same underlying meaning? Respond with 'equivalent' or 'not_equivalent'.", + "atk_acc": 9.31, + "atk_prompt": "Do these two sentences have the same underlying meaning? Respond with 'equivalent' or 'not_equivalent' vTAjHynoIG. ", + "transfer_ori_acc": 73.0, + "transfer_atk_acc": 74.0 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 53.43, + "origin_prompt": "In the role of a sentence comparison analyst, assess the provided sentences and indicate if they convey the same meaning by selecting 'equivalent' or 'not_equivalent'.", + "atk_acc": 0.25, + "atk_prompt": "In the role of a sentence comparison analyst, assess the provided sentences and indicate if they convey the same meaning by selecting 'equivalent' or 'not_equivalent' Cev0s49fbe. ", + "transfer_ori_acc": 75.0, + "transfer_atk_acc": 77.5 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 30.5, + "origin_prompt": "Review the given context and question, and decide if the context contains enough information to support the answer by selecting 'entailment' or 'not_entailment'.", + "atk_acc": 2.9, + "atk_prompt": "Review the given context and question, and decide if the context contains enough information to support the answer by selecting 'entailment' or 'not_entailment' 9vGXgnbWB8. ", + "transfer_ori_acc": 64.0, + "transfer_atk_acc": 72.5 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 35.3, + "origin_prompt": "As a language expert, assess if the given context entails the answer to the question and respond with 'entailment' or 'not_entailment'.", + "atk_acc": 5.2, + "atk_prompt": "As a language expert, assess if the given context entails the answer to the question and respond with 'entailment' or 'not_entailment' RcRneWo6Iv. ", + "transfer_ori_acc": 72.0, + "transfer_atk_acc": 78.0 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 23.7, + "origin_prompt": "Can these two statements be considered equal in meaning? Answer with 'equivalent' or 'not_equivalent'.", + "atk_acc": 2.1, + "atk_prompt": "Can these two statements be considered equal in meaning? Answer with 'equivalent' or 'not_equivalent' 6a4Yn3RGVc. ", + "transfer_ori_acc": 80.5, + "transfer_atk_acc": 82.5 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 20.9, + "origin_prompt": "As an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions.", + "atk_acc": 1.2, + "atk_prompt": "As an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions 5XeflW1ZJc. ", + "transfer_ori_acc": 82.5, + "transfer_atk_acc": 83.0 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 41.16, + "origin_prompt": "Determine if the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment'.", + "atk_acc": 1.81, + "atk_prompt": "Determine if the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment' vTAjHynoIG.", + "transfer_ori_acc": 82.5, + "transfer_atk_acc": 82.0 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 49.82, + "origin_prompt": "Working as an entailment classifier, identify whether the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment':", + "atk_acc": 13.36, + "atk_prompt": "Working as an entailment classifier, identify whether the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment' vTAjHynoIG:", + "transfer_ori_acc": 81.5, + "transfer_atk_acc": 83.0 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 53.9, + "origin_prompt": "Please identify the emotional tone of this passage: 'positive' or 'negative'?", + "atk_acc": 0.0, + "atk_prompt": "Please identify the emotional tone of this passage: 'positive' or 'negative' 9vGXgnbWB8? ", + "transfer_ori_acc": 89.5, + "transfer_atk_acc": 91.0 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 42.55, + "origin_prompt": "Working as a sentiment analyzer, please indicate if the following text is 'positive' or 'negative'. Classify:", + "atk_acc": 4.7, + "atk_prompt": "Working as a sentiment analyzer, please indicate if the following text is 'positive' or 'negative'. Classify zq0DcZ5dnI: ", + "transfer_ori_acc": 92.5, + "transfer_atk_acc": 94.0 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 43.66, + "origin_prompt": "Determine if the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment'.", + "atk_acc": 0.0, + "atk_prompt": "Determine if the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment' Vq3aBzuZcD.", + "transfer_ori_acc": 61.97183098591549, + "transfer_atk_acc": 66.19718309859155 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 43.66, + "origin_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment':", + "atk_acc": 2.82, + "atk_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment' vTAjHynoIG:", + "transfer_ori_acc": 52.112676056338024, + "transfer_atk_acc": 57.74647887323944 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 25.67, + "origin_prompt": "Evaluate the multiple-choice question about {} and select the most fitting response from 'A', 'B', 'C', or 'D'.", + "atk_acc": 10.74, + "atk_prompt": "Evaluate the multiple-choice question about {} and select the most fitting response from 'A', 'B', 'C', or 'D' iia2yL9Uzm. ", + "transfer_ori_acc": 56.49999999999999, + "transfer_atk_acc": 56.49999999999999 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 24.39, + "origin_prompt": "With your knowledge of {}, tackle the following multiple-choice question by choosing 'A', 'B', 'C', or 'D'.", + "atk_acc": 11.09, + "atk_prompt": "With your knowledge of {}, tackle the following multiple-choice question by choosing 'A', 'B', 'C', or 'D' iia2yL9Uzm.", + "transfer_ori_acc": 62.0, + "transfer_atk_acc": 61.0 + } +] \ No newline at end of file diff --git a/results_transfer/vicuna-13b_chatgpt/checklist_3_shot.json b/results_transfer/vicuna-13b_chatgpt/checklist_3_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..84e9fc27d0f7ce1072677df7ac1adbc6ce90002c --- /dev/null +++ b/results_transfer/vicuna-13b_chatgpt/checklist_3_shot.json @@ -0,0 +1,162 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 62.2, + "origin_prompt": "Assess the grammatical structure of the given sentence and classify it as 'Acceptable' or 'Unacceptable'.", + "atk_acc": 17.4, + "atk_prompt": "Assess the grammatical structure of the given sentence and classify it as 'Acceptable' or 'Unacceptable' Q47Jmd4lMV. ", + "transfer_ori_acc": 79.5, + "transfer_atk_acc": 80.0 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 35.2, + "origin_prompt": "Working as a grammar classifier, identify whether the given sentence has correct grammar, and respond with 'acceptable' for correct sentences or 'unacceptable' for incorrect ones.", + "atk_acc": 2.4, + "atk_prompt": "Working as a grammar classifier, identify whether the given sentence has correct grammar, and respond with 'acceptable' for correct sentences or 'unacceptable' for incorrect ones yh912BU8T0. ", + "transfer_ori_acc": 80.0, + "transfer_atk_acc": 79.5 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 69.36, + "origin_prompt": "Determine if the meanings of the following sentences are semantically equivalent by responding with 'equivalent' or 'not_equivalent'.", + "atk_acc": 62.01, + "atk_prompt": "Determine if the meanings of the following sentences are semantically equivalent by responding with 'equivalent' or 'not_equivalent' SFPCqpiClT. ", + "transfer_ori_acc": 70.5, + "transfer_atk_acc": 74.0 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 68.14, + "origin_prompt": "In the role of a sentence comparison analyst, assess the provided sentences and indicate if they convey the same meaning by selecting 'equivalent' or 'not_equivalent'.", + "atk_acc": 58.09, + "atk_prompt": "In the role of a sentence comparison analyst, assess the provided sentences and indicate if they convey the same meaning by selecting 'equivalent' or 'not_equivalent' xikCjkMydH. ", + "transfer_ori_acc": 71.0, + "transfer_atk_acc": 70.0 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 41.2, + "origin_prompt": "Based on the information in the context, decide if the answer to the question is justified by choosing 'entailment' or 'not_entailment'.", + "atk_acc": 1.1, + "atk_prompt": "Based on the information in the context, decide if the answer to the question is justified by choosing 'entailment' or 'not_entailment' zq0DcZ5dnI. ", + "transfer_ori_acc": 82.0, + "transfer_atk_acc": 81.0 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 41.4, + "origin_prompt": "As a semantic researcher, evaluate whether the provided context supports the answer to the question and choose 'entailment' or 'not_entailment'.", + "atk_acc": 1.0, + "atk_prompt": "As a semantic researcher, evaluate whether the provided context supports the answer to the question and choose 'entailment' or 'not_entailment' d6ZQ3u0GBQ. ", + "transfer_ori_acc": 83.5, + "transfer_atk_acc": 82.5 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 38.2, + "origin_prompt": "Assess whether the following statements are identical in meaning by answering 'equivalent' or 'not_equivalent'.", + "atk_acc": 23.5, + "atk_prompt": "Assess whether the following statements are identical in meaning by answering 'equivalent' or 'not_equivalent' 6a4Yn3RGVc. ", + "transfer_ori_acc": 82.0, + "transfer_atk_acc": 81.0 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 32.9, + "origin_prompt": "Working as a question classifier, identify whether the given questions share the same meaning, and respond with 'equivalent' for equivalent questions or 'not_equivalent' for different ones.", + "atk_acc": 25.0, + "atk_prompt": "Working as a question classifier, identify whether the given questions share the same meaning, and respond with 'equivalent' for equivalent questions or 'not_equivalent' for different ones AMsRIKZniY. ", + "transfer_ori_acc": 83.5, + "transfer_atk_acc": 83.0 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 43.68, + "origin_prompt": "Review the two statements and categorize their relationship as either 'entailment' or 'not_entailment'.", + "atk_acc": 7.22, + "atk_prompt": "Review the two statements and categorize their relationship as either 'entailment' or 'not_entailment' mzjVogkRhM. ", + "transfer_ori_acc": 82.5, + "transfer_atk_acc": 84.0 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 38.63, + "origin_prompt": "As an entailment identification system, examine the connection between the following sentences and respond with 'entailment' or 'not_entailment'.", + "atk_acc": 0.72, + "atk_prompt": "As an entailment identification system, examine the connection between the following sentences and respond with 'entailment' or 'not_entailment' Q47Jmd4lMV. ", + "transfer_ori_acc": 83.0, + "transfer_atk_acc": 81.0 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 14.45, + "origin_prompt": "Review this statement and decide whether it has a 'positive' or 'negative' sentiment.", + "atk_acc": 0.11, + "atk_prompt": "Review this statement and decide whether it has a 'positive' or 'negative' sentiment w52rwgo0Av. ", + "transfer_ori_acc": 95.5, + "transfer_atk_acc": 95.5 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 26.38, + "origin_prompt": "In the role of a sentiment analysis tool, respond with 'positive' or 'negative' to classify this statement.", + "atk_acc": 2.75, + "atk_prompt": "In the role of a sentiment analysis tool, respond with 'positive' or 'negative' to classify this statement Q47Jmd4lMV. ", + "transfer_ori_acc": 97.0, + "transfer_atk_acc": 96.5 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 28.17, + "origin_prompt": "Please classify the relationship between the provided sentences as 'entailment' or 'not_entailment'.", + "atk_acc": 8.45, + "atk_prompt": "Please classify the relationship between the provided sentences as 'entailment' or 'not_entailment' rzu98kF2KF. ", + "transfer_ori_acc": 63.38028169014085, + "transfer_atk_acc": 64.7887323943662 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 16.9, + "origin_prompt": "As an entailment identification system, examine the connection between the following sentences and respond with 'entailment' or 'not_entailment'.", + "atk_acc": 2.82, + "atk_prompt": "As an entailment identification system, examine the connection between the following sentences and respond with 'entailment' or 'not_entailment' rzu98kF2KF. ", + "transfer_ori_acc": 60.56338028169014, + "transfer_atk_acc": 60.56338028169014 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 10.15, + "origin_prompt": "In relation to the multiple-choice question on {}, please provide the accurate answer by choosing 'A', 'B', 'C', or 'D'.", + "atk_acc": 5.02, + "atk_prompt": "In relation to the multiple-choice question on {}, please provide the accurate answer by choosing 'A', 'B', 'C', or 'D' OB4KVJzIft. ", + "transfer_ori_acc": 67.0, + "transfer_atk_acc": 66.0 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 9.45, + "origin_prompt": "As someone well-versed in {}, please address the multiple-choice question below by selecting 'A', 'B', 'C', or 'D'.", + "atk_acc": 4.43, + "atk_prompt": "As someone well-versed in {}, please address the multiple-choice question below by selecting 'A', 'B', 'C', or 'D' w52rwgo0Av.", + "transfer_ori_acc": 66.0, + "transfer_atk_acc": 66.0 + } +] \ No newline at end of file diff --git a/results_transfer/vicuna-13b_chatgpt/deepwordbug_0_shot.json b/results_transfer/vicuna-13b_chatgpt/deepwordbug_0_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..1874c4e72a50a597d1a0816dda0b0d96424dc424 --- /dev/null +++ b/results_transfer/vicuna-13b_chatgpt/deepwordbug_0_shot.json @@ -0,0 +1,162 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 69.2, + "origin_prompt": "Assess the following sentence and determine if it is grammatically correct. Respond with 'Acceptable' or 'Unacceptable':", + "atk_acc": 3.6, + "atk_prompt": "sAsess the ofllowing lsentence and determine if it is grammatiaclly correct. Rspond with 'Acceptable' or 'Unacceptable':", + "transfer_ori_acc": 76.0, + "transfer_atk_acc": 75.5 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 50.6, + "origin_prompt": "In the role of a syntax analyzer, examine the grammar of the provided sentence and decide if it is correct, answering with 'acceptable' for grammatically correct sentences or 'unacceptable' for incorrect ones:", + "atk_acc": 4.8, + "atk_prompt": "n the role of a syntax analyzer, examine the grammar of the provided sentence and decide if it is correct, answering with 'acceptable' for grammatically correct sentences or 'unacceptable' for incorret oKnes:", + "transfer_ori_acc": 79.0, + "transfer_atk_acc": 77.5 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 57.84, + "origin_prompt": "Compare the following sentences and determine if they share the same semantic meaning by responding with 'equivalent' or 'not_equivalent'.", + "atk_acc": 0.0, + "atk_prompt": "oCmpare the follAwing sentewces and determine if they share the same swmantic meaning by responding with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 74.0, + "transfer_atk_acc": 71.5 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 52.21, + "origin_prompt": "In your capacity as a semantic comparison tool, examine the given sentences and decide if their meanings are identical, responding with 'equivalent' or 'not_equivalent'.", + "atk_acc": 1.47, + "atk_prompt": "IYn your capaciMy as a semantic comparison ool, examine the given sentences and deXide if their meanings are identRcal, rseponding with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 75.0, + "transfer_atk_acc": 73.0 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 30.5, + "origin_prompt": "Review the given context and question, and decide if the context contains enough information to support the answer by selecting 'entailment' or 'not_entailment'.", + "atk_acc": 4.7, + "atk_prompt": "Rview the gven cnotext and question, and edcide if the context containZs enough information to Vsupport the ainswer by selecting 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 65.0, + "transfer_atk_acc": 68.5 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 35.3, + "origin_prompt": "As a language expert, assess if the given context entails the answer to the question and respond with 'entailment' or 'not_entailment'.", + "atk_acc": 2.1, + "atk_prompt": "As a language epxert, arssess if the gien contet entaxls the ansGer to the questAon and resNpond with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 72.0, + "transfer_atk_acc": 77.5 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 24.1, + "origin_prompt": "Analyze if the given set of sentences have the same connotation by answering with 'equivalent' or 'not_equivalent'.", + "atk_acc": 2.0, + "atk_prompt": "tAnalyze if the giCen est of sentUnces have the same connotatioq by answering with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 79.0, + "transfer_atk_acc": 80.0 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 16.9, + "origin_prompt": "In the role of a question similarity analyzer, examine the meaning of the provided questions and decide if they are equivalent, answering with 'equivalent' for equivalent questions or 'not_equivalent' for different questions.", + "atk_acc": 0.7, + "atk_prompt": "IZn the role of a questoion similarity naalyzer, examin the meJaning of the provided questions and Uecide if they are equivalent, answering with 'equivalent' for equivalent questions or 'not_equivalent' for dSfferent questioXns. ", + "transfer_ori_acc": 81.5, + "transfer_atk_acc": 81.0 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 41.16, + "origin_prompt": "Determine if the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment'.", + "atk_acc": 1.08, + "atk_prompt": "DetermiCne if the givePn pir of sentcnces disWlays entailment or not_entailment. Rtspond with 'entailment' or 'not_entailment'.", + "transfer_ori_acc": 82.5, + "transfer_atk_acc": 81.0 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 49.82, + "origin_prompt": "Working as an entailment classifier, identify whether the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment':", + "atk_acc": 1.08, + "atk_prompt": "orking as an entailment clasifier, idenStify whethre the givec ptair of sentInces displays entailment or not_entailment. ReKspond with 'entailment' or 'not_entailment':", + "transfer_ori_acc": 81.0, + "transfer_atk_acc": 78.5 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 48.05, + "origin_prompt": "After examining the following expression, label its emotion as either 'positive' or 'negative':", + "atk_acc": 0.8, + "atk_prompt": "kfter eximining the followinM expression, labe its emolion as egther 'positive' or 'negative': ", + "transfer_ori_acc": 90.0, + "transfer_atk_acc": 86.5 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 41.06, + "origin_prompt": "Functioning as a sentiment identification tool, assess if the following expression is 'positive' or 'negative'. Classify:", + "atk_acc": 27.18, + "atk_prompt": "Functionnig as a sentiment identification tool, assess if the flolowing xepression is 'positive' or 'negative'. ClUssify: ", + "transfer_ori_acc": 92.0, + "transfer_atk_acc": 90.0 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 43.66, + "origin_prompt": "Determine if the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment'.", + "atk_acc": 0.0, + "atk_prompt": "VDetermine if the giPen Apair of setences displUays entailment or not_entailment. QRespond with 'entailment' or 'not_entailment'.", + "transfer_ori_acc": 61.97183098591549, + "transfer_atk_acc": 63.38028169014085 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 43.66, + "origin_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment':", + "atk_acc": 0.0, + "atk_prompt": "kn your rohe as an entailment analysis tool, Zassess the relationship between the gUiven saentences and clasFsify it as 'entailment' or 'not_entailment':", + "transfer_ori_acc": 52.112676056338024, + "transfer_atk_acc": 47.88732394366197 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 25.67, + "origin_prompt": "Evaluate the multiple-choice question about {} and select the most fitting response from 'A', 'B', 'C', or 'D'.", + "atk_acc": 14.7, + "atk_prompt": "Evaluate the multple-choice question about {} and elect the most fitting respone from 'A', 'B', 'C', or 'D'. ", + "transfer_ori_acc": 57.49999999999999, + "transfer_atk_acc": 60.0 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 25.09, + "origin_prompt": "As a knowledgeable individual in {}, provide your response to the multiple-choice question by choosing 'A', 'B', 'C', or 'D'.", + "atk_acc": 16.45, + "atk_prompt": "As a kAnowledgeable indviidual in {}, provide your ersponse to the multipole-choice quesxion by choosing 'A', 'B', 'C', or 'D'.", + "transfer_ori_acc": 60.0, + "transfer_atk_acc": 61.0 + } +] \ No newline at end of file diff --git a/results_transfer/vicuna-13b_chatgpt/deepwordbug_3_shot.json b/results_transfer/vicuna-13b_chatgpt/deepwordbug_3_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..7f6c4f0e9b47852ee090335b3317b45c8df10acb --- /dev/null +++ b/results_transfer/vicuna-13b_chatgpt/deepwordbug_3_shot.json @@ -0,0 +1,162 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 62.0, + "origin_prompt": "Assess the grammatical structure of the given sentence and classify it as 'Acceptable' or 'Unacceptable'.", + "atk_acc": 11.4, + "atk_prompt": "Assess the grammatcal etructure of the given entence and classif it as 'Acceptable' or 'Unacceptable'. ", + "transfer_ori_acc": 79.5, + "transfer_atk_acc": 79.5 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 35.1, + "origin_prompt": "Working as a grammar classifier, identify whether the given sentence has correct grammar, and respond with 'acceptable' for correct sentences or 'unacceptable' for incorrect ones.", + "atk_acc": 0.0, + "atk_prompt": "Working as a grammar clsasifier, identify whether the given sentence has correct grammar, and reszond with 'acceptable' for correct sentences or 'unacceptable' for incorrect noes. ", + "transfer_ori_acc": 80.5, + "transfer_atk_acc": 81.0 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 67.65, + "origin_prompt": "Do the meanings of these two statements align? Indicate your answer with 'equivalent' or 'not_equivalent'.", + "atk_acc": 36.76, + "atk_prompt": "Zo the meanings of these Xwo statements Ualign? IndicNte your answeY with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 66.5, + "transfer_atk_acc": 67.0 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 69.12, + "origin_prompt": "In your capacity as a semantic assessment tool, evaluate the provided sentences and classify their meanings as 'equivalent' or 'not_equivalent'.", + "atk_acc": 34.31, + "atk_prompt": "nI your capacitCy as a sebmantic assgssment otol, evaluate the provided sentenceAs and Zlassify their meaniPgs as 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 66.5, + "transfer_atk_acc": 68.0 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 42.7, + "origin_prompt": "Review the given context and question, and decide if the context contains enough information to support the answer by selecting 'entailment' or 'not_entailment'.", + "atk_acc": 0.2, + "atk_prompt": "RevTiew the ginen cEntext and uestion, and decid if the context conatains enough iformation to uspport the answVer by selectin 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 83.5, + "transfer_atk_acc": 84.5 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 42.9, + "origin_prompt": "As a linguistic consultant, decide if the answer to the question is logically supported by the provided context and respond with 'entailment' or 'not_entailment'.", + "atk_acc": 1.8, + "atk_prompt": "Abs a linguistic consultant, decide if the answer to the question is logically supported by the provided Aontext and respGond with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 81.5, + "transfer_atk_acc": 79.5 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 38.1, + "origin_prompt": "Assess whether the following statements are identical in meaning by answering 'equivalent' or 'not_equivalent'.", + "atk_acc": 20.3, + "atk_prompt": "Asess hwether the following statemets are identiacl in meaning by answerinEg 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 81.5, + "transfer_atk_acc": 82.5 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 33.6, + "origin_prompt": "In your role as a question comparison tool, assess the following pair of questions and classify them as 'equivalent' or 'not_equivalent'.", + "atk_acc": 23.6, + "atk_prompt": "In your Irole as a questioGn comparison tolo, ussess the foMlowing pair of questrions and classify them as 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 81.5, + "transfer_atk_acc": 82.5 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 50.54, + "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.", + "atk_acc": 0.36, + "atk_prompt": "Aer the following wto sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one wor. ", + "transfer_ori_acc": 82.5, + "transfer_atk_acc": 82.0 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 36.82, + "origin_prompt": "In the role of a semantic relationship analyzer, examine the connection between the given sentences and decide if they exhibit entailment or not_entailment. Answer with 'entailment' or 'not_entailment'.", + "atk_acc": 1.08, + "atk_prompt": "In the role of a semgntic relationship anayzer, examin the connection between the given sentences and decide if they exhibit entailment or not_entailment. AnCwer with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 82.5, + "transfer_atk_acc": 81.5 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 14.45, + "origin_prompt": "Review this statement and decide whether it has a 'positive' or 'negative' sentiment.", + "atk_acc": 0.0, + "atk_prompt": "GReview this stateent and decide wdether it has a 'positive' or 'negative' sentiment. ", + "transfer_ori_acc": 95.5, + "transfer_atk_acc": 95.0 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 26.49, + "origin_prompt": "In the role of a sentiment analysis tool, respond with 'positive' or 'negative' to classify this statement.", + "atk_acc": 0.0, + "atk_prompt": "In the ole of a senteiment naalysis tool, resond with 'positive' or 'negative' to classfy this sattement. ", + "transfer_ori_acc": 97.0, + "transfer_atk_acc": 95.5 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 23.94, + "origin_prompt": "Does the relationship between the given sentences represent entailment or not_entailment? Respond with 'entailment' or 'not_entailment'.", + "atk_acc": 0.0, + "atk_prompt": "SDoes the relationship between the given stentences represent entailment or not_entailment? ResYond with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 67.6056338028169, + "transfer_atk_acc": 63.38028169014085 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 16.9, + "origin_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment'.", + "atk_acc": 0.0, + "atk_prompt": "mn your roye as an entailment aunalysis tooS, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 63.38028169014085, + "transfer_atk_acc": 64.7887323943662 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 10.15, + "origin_prompt": "In relation to the multiple-choice question on {}, please provide the accurate answer by choosing 'A', 'B', 'C', or 'D'.", + "atk_acc": 3.5, + "atk_prompt": "oIn relatin to the mhltiple-choice question on {}, plese provide the accurate answer by choosing 'A', 'B', 'C', or 'D'. ", + "transfer_ori_acc": 67.0, + "transfer_atk_acc": 65.0 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 9.8, + "origin_prompt": "With your knowledge of {}, tackle the following multiple-choice question by choosing 'A', 'B', 'C', or 'D'.", + "atk_acc": 6.07, + "atk_prompt": "With your knowlegde of {}, tacke the folloing mulBiple-choice question by choosing 'A', 'B', 'C', or 'D'.", + "transfer_ori_acc": 66.0, + "transfer_atk_acc": 64.0 + } +] \ No newline at end of file diff --git a/results_transfer/vicuna-13b_chatgpt/stresstest_0_shot.json b/results_transfer/vicuna-13b_chatgpt/stresstest_0_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..52f388320e3c6d0d06b2045d670e7584a7ce0527 --- /dev/null +++ b/results_transfer/vicuna-13b_chatgpt/stresstest_0_shot.json @@ -0,0 +1,162 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 69.0, + "origin_prompt": "Assess the following sentence and determine if it is grammatically correct. Respond with 'Acceptable' or 'Unacceptable':", + "atk_acc": 34.8, + "atk_prompt": "Assess the following sentence and determine if it is grammatically correct. Respond with 'Acceptable' or 'Unacceptable' and false is not true :", + "transfer_ori_acc": 76.0, + "transfer_atk_acc": 76.5 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 57.1, + "origin_prompt": "Functioning as a grammar evaluation tool, analyze the given sentence and decide if it is grammatically correct, responding with 'acceptable' or 'unacceptable':", + "atk_acc": 20.8, + "atk_prompt": "Functioning as a grammar evaluation tool, analyze the given sentence and decide if it is grammatically correct, responding with 'acceptable' or 'unacceptable' and false is not true :", + "transfer_ori_acc": 79.0, + "transfer_atk_acc": 76.5 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 58.09, + "origin_prompt": "Compare the following sentences and determine if they share the same semantic meaning by responding with 'equivalent' or 'not_equivalent'.", + "atk_acc": 56.37, + "atk_prompt": "Compare the following sentences and determine if they share the same semantic meaning by responding with 'equivalent' or 'not_equivalent' and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 73.5, + "transfer_atk_acc": 74.0 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 52.21, + "origin_prompt": "In your capacity as a semantic comparison tool, examine the given sentences and decide if their meanings are identical, responding with 'equivalent' or 'not_equivalent'.", + "atk_acc": 33.33, + "atk_prompt": "In your capacity as a semantic comparison tool, examine the given sentences and decide if their meanings are identical, responding with 'equivalent' or 'not_equivalent' and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 75.5, + "transfer_atk_acc": 74.5 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 30.9, + "origin_prompt": "Based on the provided context and question, decide if the information supports the answer by responding with 'entailment' or 'not_entailment'.", + "atk_acc": 16.2, + "atk_prompt": "Based on the provided context and question, decide if the information supports the answer by responding with 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 78.0, + "transfer_atk_acc": 72.5 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 35.3, + "origin_prompt": "As a language expert, assess if the given context entails the answer to the question and respond with 'entailment' or 'not_entailment'.", + "atk_acc": 19.8, + "atk_prompt": "As a language expert, assess if the given context entails the answer to the question and respond with 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 71.5, + "transfer_atk_acc": 75.0 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 23.9, + "origin_prompt": "Can these two statements be considered equal in meaning? Answer with 'equivalent' or 'not_equivalent'.", + "atk_acc": 27.7, + "atk_prompt": "Can these two statements be considered equal in meaning? Answer with 'equivalent' or 'not_equivalent' and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 81.5, + "transfer_atk_acc": 80.5 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 21.1, + "origin_prompt": "As an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions.", + "atk_acc": 21.5, + "atk_prompt": "As an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 81.5, + "transfer_atk_acc": 82.5 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 47.65, + "origin_prompt": "Please classify the relationship between the provided sentences as 'entailment' or 'not_entailment'.", + "atk_acc": 23.47, + "atk_prompt": "Please classify the relationship between the provided sentences as 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true .", + "transfer_ori_acc": 79.5, + "transfer_atk_acc": 78.5 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 48.01, + "origin_prompt": "As an entailment identification system, examine the connection between the following sentences and respond with 'entailment' or 'not_entailment':", + "atk_acc": 40.07, + "atk_prompt": "As an entailment identification system, examine the connection between the following sentences and respond with 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true :", + "transfer_ori_acc": 75.5, + "transfer_atk_acc": 80.0 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 53.9, + "origin_prompt": "Please identify the emotional tone of this passage: 'positive' or 'negative'?", + "atk_acc": 17.89, + "atk_prompt": "Please identify the emotional tone of this passage: 'positive' or 'negative' and true is true and true is true and true is true and true is true and true is true ? ", + "transfer_ori_acc": 89.5, + "transfer_atk_acc": 92.5 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 40.94, + "origin_prompt": "Functioning as a sentiment identification tool, assess if the following expression is 'positive' or 'negative'. Classify:", + "atk_acc": 36.47, + "atk_prompt": "Functioning as a sentiment identification tool, assess if the following expression is 'positive' or 'negative'. Classify and false is not true : ", + "transfer_ori_acc": 92.0, + "transfer_atk_acc": 93.0 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 38.03, + "origin_prompt": "Assess the connection between the following sentences and classify it as 'entailment' or 'not_entailment'.", + "atk_acc": 14.08, + "atk_prompt": "Assess the connection between the following sentences and classify it as 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true .", + "transfer_ori_acc": 52.112676056338024, + "transfer_atk_acc": 47.88732394366197 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 43.66, + "origin_prompt": "Functioning as an entailment evaluation tool, analyze the provided sentences and decide if their relationship is 'entailment' or 'not_entailment':", + "atk_acc": 32.39, + "atk_prompt": "Functioning as an entailment evaluation tool, analyze the provided sentences and decide if their relationship is 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true :", + "transfer_ori_acc": 47.88732394366197, + "transfer_atk_acc": 53.52112676056338 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 25.09, + "origin_prompt": "Answer the subsequent multiple-choice question about {} by picking the right option among 'A', 'B', 'C', or 'D'.", + "atk_acc": 19.49, + "atk_prompt": "Answer the subsequent multiple-choice question about {} by picking the right option among 'A', 'B', 'C', or 'D' and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 62.5, + "transfer_atk_acc": 60.0 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 24.39, + "origin_prompt": "With your knowledge of {}, tackle the following multiple-choice question by choosing 'A', 'B', 'C', or 'D'.", + "atk_acc": 12.72, + "atk_prompt": "With your knowledge of {}, tackle the following multiple-choice question by choosing 'A', 'B', 'C', or 'D' and true is true and true is true and true is true and true is true and true is true .", + "transfer_ori_acc": 62.0, + "transfer_atk_acc": 59.5 + } +] \ No newline at end of file diff --git a/results_transfer/vicuna-13b_chatgpt/stresstest_3_shot.json b/results_transfer/vicuna-13b_chatgpt/stresstest_3_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..1dc9b15d0f574db9074d45cb318d1353d59b9150 --- /dev/null +++ b/results_transfer/vicuna-13b_chatgpt/stresstest_3_shot.json @@ -0,0 +1,162 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 47.7, + "origin_prompt": "Examine the sentence and decide if its grammar is 'Acceptable' or 'Unacceptable'.", + "atk_acc": 10.6, + "atk_prompt": "Examine the sentence and decide if its grammar is 'Acceptable' or 'Unacceptable' and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 80.0, + "transfer_atk_acc": 80.5 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 35.2, + "origin_prompt": "Working as a grammar classifier, identify whether the given sentence has correct grammar, and respond with 'acceptable' for correct sentences or 'unacceptable' for incorrect ones.", + "atk_acc": 0.9, + "atk_prompt": "Working as a grammar classifier, identify whether the given sentence has correct grammar, and respond with 'acceptable' for correct sentences or 'unacceptable' for incorrect ones and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 80.0, + "transfer_atk_acc": 80.0 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 67.65, + "origin_prompt": "Do the meanings of these two statements align? Indicate your answer with 'equivalent' or 'not_equivalent'.", + "atk_acc": 67.4, + "atk_prompt": "Do the meanings of these two statements align? Indicate your answer with 'equivalent' or 'not_equivalent' and false is not true . ", + "transfer_ori_acc": 66.5, + "transfer_atk_acc": 65.5 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 68.14, + "origin_prompt": "In the role of a sentence comparison analyst, assess the provided sentences and indicate if they convey the same meaning by selecting 'equivalent' or 'not_equivalent'.", + "atk_acc": 59.8, + "atk_prompt": "In the role of a sentence comparison analyst, assess the provided sentences and indicate if they convey the same meaning by selecting 'equivalent' or 'not_equivalent' and false is not true . ", + "transfer_ori_acc": 70.5, + "transfer_atk_acc": 74.0 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 41.2, + "origin_prompt": "Based on the information in the context, decide if the answer to the question is justified by choosing 'entailment' or 'not_entailment'.", + "atk_acc": 6.7, + "atk_prompt": "Based on the information in the context, decide if the answer to the question is justified by choosing 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 82.0, + "transfer_atk_acc": 81.5 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 42.9, + "origin_prompt": "As a linguistic consultant, decide if the answer to the question is logically supported by the provided context and respond with 'entailment' or 'not_entailment'.", + "atk_acc": 7.0, + "atk_prompt": "As a linguistic consultant, decide if the answer to the question is logically supported by the provided context and respond with 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 81.5, + "transfer_atk_acc": 81.0 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 38.1, + "origin_prompt": "Assess whether the following statements are identical in meaning by answering 'equivalent' or 'not_equivalent'.", + "atk_acc": 41.2, + "atk_prompt": "Assess whether the following statements are identical in meaning by answering 'equivalent' or 'not_equivalent' and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 82.0, + "transfer_atk_acc": 82.0 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 33.0, + "origin_prompt": "In the role of a question similarity analyzer, examine the meaning of the provided questions and decide if they are equivalent, answering with 'equivalent' for equivalent questions or 'not_equivalent' for different questions.", + "atk_acc": 26.4, + "atk_prompt": "In the role of a question similarity analyzer, examine the meaning of the provided questions and decide if they are equivalent, answering with 'equivalent' for equivalent questions or 'not_equivalent' for different questions and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 83.0, + "transfer_atk_acc": 83.5 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 50.54, + "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.", + "atk_acc": 14.08, + "atk_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word and true is true . ", + "transfer_ori_acc": 83.5, + "transfer_atk_acc": 82.5 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 41.88, + "origin_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment'.", + "atk_acc": 6.5, + "atk_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 83.5, + "transfer_atk_acc": 84.0 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 14.45, + "origin_prompt": "Review this statement and decide whether it has a 'positive' or 'negative' sentiment.", + "atk_acc": 0.0, + "atk_prompt": "Review this statement and decide whether it has a 'positive' or 'negative' sentiment and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 95.5, + "transfer_atk_acc": 95.5 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 26.38, + "origin_prompt": "In the role of a sentiment analysis tool, respond with 'positive' or 'negative' to classify this statement.", + "atk_acc": 1.95, + "atk_prompt": "In the role of a sentiment analysis tool, respond with 'positive' or 'negative' to classify this statement and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 97.0, + "transfer_atk_acc": 97.0 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 28.17, + "origin_prompt": "Please classify the relationship between the provided sentences as 'entailment' or 'not_entailment'.", + "atk_acc": 1.41, + "atk_prompt": "Please classify the relationship between the provided sentences as 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 63.38028169014085, + "transfer_atk_acc": 64.7887323943662 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 16.9, + "origin_prompt": "As an entailment identification system, examine the connection between the following sentences and respond with 'entailment' or 'not_entailment'.", + "atk_acc": 2.82, + "atk_prompt": "As an entailment identification system, examine the connection between the following sentences and respond with 'entailment' or 'not_entailment' and true is true . ", + "transfer_ori_acc": 60.56338028169014, + "transfer_atk_acc": 61.97183098591549 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 10.15, + "origin_prompt": "In relation to the multiple-choice question on {}, please provide the accurate answer by choosing 'A', 'B', 'C', or 'D'.", + "atk_acc": 11.67, + "atk_prompt": "In relation to the multiple-choice question on {}, please provide the accurate answer by choosing 'A', 'B', 'C', or 'D' and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 67.0, + "transfer_atk_acc": 64.5 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 9.8, + "origin_prompt": "With your knowledge of {}, tackle the following multiple-choice question by choosing 'A', 'B', 'C', or 'D'.", + "atk_acc": 10.85, + "atk_prompt": "With your knowledge of {}, tackle the following multiple-choice question by choosing 'A', 'B', 'C', or 'D' and true is true and true is true and true is true and true is true and true is true .", + "transfer_ori_acc": 66.0, + "transfer_atk_acc": 63.5 + } +] \ No newline at end of file diff --git a/results_transfer/vicuna-13b_chatgpt/textbugger_0_shot.json b/results_transfer/vicuna-13b_chatgpt/textbugger_0_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..adf84a882cf3ff94b8ed4abb7c8a040564907b72 --- /dev/null +++ b/results_transfer/vicuna-13b_chatgpt/textbugger_0_shot.json @@ -0,0 +1,162 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 69.0, + "origin_prompt": "Assess the following sentence and determine if it is grammatically correct. Respond with 'Acceptable' or 'Unacceptable':", + "atk_acc": 0.0, + "atk_prompt": "Evaluating the later sen𝚝ence and determine if it is grammatically correct. Respoոd with 'Acceptable' or 'Unacceptable':", + "transfer_ori_acc": 76.5, + "transfer_atk_acc": 75.5 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 54.9, + "origin_prompt": "As a tool for determining grammatical correctness, review the sentence and categorize its grammar as either 'acceptable' or 'unacceptable':", + "atk_acc": 0.0, + "atk_prompt": "Aѕ a tool for determining grammatical regularity, reviewing the penalties and categorizе its grammar as ei𝚝her 'acceptable' or 'unacceptable':", + "transfer_ori_acc": 78.5, + "transfer_atk_acc": 64.5 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 58.09, + "origin_prompt": "Compare the following sentences and determine if they share the same semantic meaning by responding with 'equivalent' or 'not_equivalent'.", + "atk_acc": 0.49, + "atk_prompt": "Comрare the later sentences and determine if they share the same semantic meaning by respoոding with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 73.5, + "transfer_atk_acc": 74.5 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 53.43, + "origin_prompt": "In the role of a sentence comparison analyst, assess the provided sentences and indicate if they convey the same meaning by selecting 'equivalent' or 'not_equivalent'.", + "atk_acc": 0.0, + "atk_prompt": "Iո the rloe of a sentence comparison analyst, asess the provided sentences and indicate if they convey the same meaning by selec𝚝ing 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 75.0, + "transfer_atk_acc": 74.0 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 30.5, + "origin_prompt": "Review the given context and question, and decide if the context contains enough information to support the answer by selecting 'entailment' or 'not_entailment'.", + "atk_acc": 0.0, + "atk_prompt": "Revisited the afforded contеxt and question, and decide if the context contains enough information to support the answer by selec𝚝ing 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 63.5, + "transfer_atk_acc": 67.5 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 38.4, + "origin_prompt": "As a textual inference expert, analyze if the answer to the question can be deduced from the provided context and select 'entailment' or 'not_entailment'.", + "atk_acc": 0.1, + "atk_prompt": "Aѕ a textual deduced expeⲅt, analyzes if the replying to the question can be deduced from the provided context and selec𝚝 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 80.5, + "transfer_atk_acc": 71.0 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 23.9, + "origin_prompt": "Can these two statements be considered equal in meaning? Answer with 'equivalent' or 'not_equivalent'.", + "atk_acc": 0.2, + "atk_prompt": "Possible these two statemen𝚝s be considered equal in meaing? Answer with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 80.0, + "transfer_atk_acc": 76.5 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 16.9, + "origin_prompt": "In the role of a question similarity analyzer, examine the meaning of the provided questions and decide if they are equivalent, answering with 'equivalent' for equivalent questions or 'not_equivalent' for different questions.", + "atk_acc": 0.0, + "atk_prompt": "Iո the role of a question similarity analyzer, examine the meaning of the provided questions and decide if they are equivalent, answering with 'equivalent' for equivalent questions or 'not_equivalent' for different ques𝚝ions. ", + "transfer_ori_acc": 82.0, + "transfer_atk_acc": 82.0 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 45.13, + "origin_prompt": "Does the relationship between the given sentences represent entailment or not_entailment? Respond with 'entailment' or 'not_entailment'.", + "atk_acc": 1.08, + "atk_prompt": "Ai the relationship between the given sentence represent entailment or not_entailment? Answer with 'entailment' or 'not_entailment'.", + "transfer_ori_acc": 82.5, + "transfer_atk_acc": 82.0 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 51.26, + "origin_prompt": "In the role of a semantic relationship analyzer, examine the connection between the given sentences and decide if they exhibit entailment or not_entailment. Answer with 'entailment' or 'not_entailment':", + "atk_acc": 0.36, + "atk_prompt": "Onto the role of a semantic relationship offing, e×amine the connection between the given seոtences and decіde if they exhibit entailment or not_entailment. Answer with 'entailment' or 'not_entailment':", + "transfer_ori_acc": 80.5, + "transfer_atk_acc": 82.0 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 48.05, + "origin_prompt": "After examining the following expression, label its emotion as either 'positive' or 'negative':", + "atk_acc": 2.06, + "atk_prompt": "After examining the following expression, label its emo𝚝ion as neither 'positive' or 'negative': ", + "transfer_ori_acc": 90.0, + "transfer_atk_acc": 28.999999999999996 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 37.5, + "origin_prompt": "As an emotion detector, determine if the provided passage conveys a 'positive' or 'negative' sentiment. Classify:", + "atk_acc": 1.03, + "atk_prompt": "Because an emotion detector, determine if the povided passgae convey a 'positive' or 'negative' ambience. Classi𝚏y: ", + "transfer_ori_acc": 88.0, + "transfer_atk_acc": 87.5 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 43.66, + "origin_prompt": "Does the relationship between the given sentences represent entailment or not_entailment? Respond with 'entailment' or 'not_entailment'.", + "atk_acc": 1.41, + "atk_prompt": "Deos the relationship between the given sentences represent entailment or not_entailment? Answering with 'entailment' or 'not_entailment'.", + "transfer_ori_acc": 59.154929577464785, + "transfer_atk_acc": 59.154929577464785 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 43.66, + "origin_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment':", + "atk_acc": 0.0, + "atk_prompt": "During your roⅼe as an entailment analysis tool, evaluating the relationship between the given sentences and classifying it as 'entailment' or 'not_entailment':", + "transfer_ori_acc": 52.112676056338024, + "transfer_atk_acc": 50.70422535211267 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 25.67, + "origin_prompt": "Evaluate the multiple-choice question about {} and select the most fitting response from 'A', 'B', 'C', or 'D'.", + "atk_acc": 9.92, + "atk_prompt": "Assess the mul𝚝iple-choice question about {} and seⅼect the most fitting response from 'A', 'B', 'C', or 'D'. ", + "transfer_ori_acc": 56.99999999999999, + "transfer_atk_acc": 60.5 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 24.39, + "origin_prompt": "With your knowledge of {}, tackle the following multiple-choice question by choosing 'A', 'B', 'C', or 'D'.", + "atk_acc": 9.57, + "atk_prompt": "With your knowledge of {}, tackle the 𝚏ollowing multiple-choіce problem by choosing 'A', 'B', 'C', or 'D'.", + "transfer_ori_acc": 62.0, + "transfer_atk_acc": 63.0 + } +] \ No newline at end of file diff --git a/results_transfer/vicuna-13b_chatgpt/textbugger_3_shot.json b/results_transfer/vicuna-13b_chatgpt/textbugger_3_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..0187336087c58260ddbaa990015ecfd8d008d41d --- /dev/null +++ b/results_transfer/vicuna-13b_chatgpt/textbugger_3_shot.json @@ -0,0 +1,162 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 62.2, + "origin_prompt": "Assess the grammatical structure of the given sentence and classify it as 'Acceptable' or 'Unacceptable'.", + "atk_acc": 2.2, + "atk_prompt": "Assess the grammatically s𝚝ructure of the ɡiven sentecne and clasisfy it as 'Acceptable' or 'Unacceptable'. ", + "transfer_ori_acc": 79.5, + "transfer_atk_acc": 79.0 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 48.0, + "origin_prompt": "While performing grammar analysis, classify the grammar of the following sentence as 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar.", + "atk_acc": 0.0, + "atk_prompt": "Whie peⲅforming grammar analysis, ϲlassify the grammatical of the following sentence as 'acceptable' for correct grammar or 'unacceptable' for incorrect grammr. ", + "transfer_ori_acc": 82.0, + "transfer_atk_acc": 81.0 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 68.38, + "origin_prompt": "Assess if the two given sentences have equivalent meanings by selecting 'equivalent' or 'not_equivalent'.", + "atk_acc": 44.61, + "atk_prompt": "Assessing if the t wo given sentences have equivalent meanings by selectiոg 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 71.5, + "transfer_atk_acc": 70.0 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 68.14, + "origin_prompt": "In the role of a sentence comparison analyst, assess the provided sentences and indicate if they convey the same meaning by selecting 'equivalent' or 'not_equivalent'.", + "atk_acc": 44.36, + "atk_prompt": "Onto the role of a senteոce c omparison analys𝚝, assess the given sentences and indicate if they convey the same meaning by selecting 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 70.5, + "transfer_atk_acc": 69.0 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 38.9, + "origin_prompt": "Analyze the context and question, and indicate if the context entails the answer by choosing 'entailment' or 'not_entailment'.", + "atk_acc": 0.7, + "atk_prompt": "Analyze the context and question, and depicts if the context involves the ans wer by choosiոg 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 81.0, + "transfer_atk_acc": 81.0 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 41.4, + "origin_prompt": "As a semantic researcher, evaluate whether the provided context supports the answer to the question and choose 'entailment' or 'not_entailment'.", + "atk_acc": 0.0, + "atk_prompt": "As a semantic reesarcher, evaluating whether the provided context supports the answering to the question and chosen 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 82.5, + "transfer_atk_acc": 83.0 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 41.8, + "origin_prompt": "Do the following expressions mean the same thing? Provide your answer as 'equivalent' or 'not_equivalent'.", + "atk_acc": 26.7, + "atk_prompt": "Wo the following expressions meant the same thing? Provide your aոswer as 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 82.5, + "transfer_atk_acc": 83.0 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 33.6, + "origin_prompt": "In your role as a question comparison tool, assess the following pair of questions and classify them as 'equivalent' or 'not_equivalent'.", + "atk_acc": 15.0, + "atk_prompt": "For your roⅼe as a question compɑrison tool, evaluate the 𝚏ollowing pair of questions and categorized them as 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 81.0, + "transfer_atk_acc": 83.0 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 43.68, + "origin_prompt": "Review the two statements and categorize their relationship as either 'entailment' or 'not_entailment'.", + "atk_acc": 2.53, + "atk_prompt": "Revіew the two sta𝚝ements and categorize their relationship as or 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 82.5, + "transfer_atk_acc": 84.0 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 36.82, + "origin_prompt": "In the role of a semantic relationship analyzer, examine the connection between the given sentences and decide if they exhibit entailment or not_entailment. Answer with 'entailment' or 'not_entailment'.", + "atk_acc": 0.0, + "atk_prompt": "In the role of a semantic relationship analyzeⲅ, examine the connection between the givn sentences and determine if they exhibi𝚝 entailment or not_entailment. Answers with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 82.5, + "transfer_atk_acc": 83.5 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 18.12, + "origin_prompt": "Evaluate the sentiment of the given text and classify it as 'positive' or 'negative'.", + "atk_acc": 0.0, + "atk_prompt": "Evaluate the vibe of the gievn text and calssify it as 'positive' or 'negative'. ", + "transfer_ori_acc": 95.5, + "transfer_atk_acc": 92.5 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 33.37, + "origin_prompt": "Taking on the role of an emotion classifier, specify if the provided phrase is 'positive' or 'negative'.", + "atk_acc": 0.57, + "atk_prompt": "Taking on the roles of an emotions classi𝚏ier, speci𝚏y if the povided phrase is 'positive' or 'negative'. ", + "transfer_ori_acc": 94.5, + "transfer_atk_acc": 93.0 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 23.94, + "origin_prompt": "Does the relationship between the given sentences represent entailment or not_entailment? Respond with 'entailment' or 'not_entailment'.", + "atk_acc": 0.0, + "atk_prompt": "Does the relations between the given sentences ⲅepresent entailment or not_entailment? Respоnd with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 67.6056338028169, + "transfer_atk_acc": 74.64788732394366 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 16.9, + "origin_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment'.", + "atk_acc": 1.41, + "atk_prompt": "During your role as an entailment aalysis tool, assess the relationshiр between the given sentences and classify it as 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 61.97183098591549, + "transfer_atk_acc": 61.97183098591549 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 9.1, + "origin_prompt": "Answer the subsequent multiple-choice question about {} by picking the right option among 'A', 'B', 'C', or 'D'.", + "atk_acc": 3.5, + "atk_prompt": "Responding the subequent multipl-echoice question about {} by pick the right op𝚝ion amongst 'A', 'B', 'C', or 'D'. ", + "transfer_ori_acc": 66.5, + "transfer_atk_acc": 66.0 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 9.68, + "origin_prompt": "With your understanding of {}, kindly answer the subsequent multiple-choice question by selecting 'A', 'B', 'C', or 'D'.", + "atk_acc": 2.45, + "atk_prompt": "Witհ your understand of {}, 𝒌indly answer the successive mսltiple-choice question by selecting 'A', 'B', 'C', or 'D'.", + "transfer_ori_acc": 64.0, + "transfer_atk_acc": 65.5 + } +] \ No newline at end of file diff --git a/results_transfer/vicuna-13b_chatgpt/textfooler_0_shot.json b/results_transfer/vicuna-13b_chatgpt/textfooler_0_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..416e2764baa52fa77e28c19a37560022fb4b8ef1 --- /dev/null +++ b/results_transfer/vicuna-13b_chatgpt/textfooler_0_shot.json @@ -0,0 +1,162 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 69.0, + "origin_prompt": "Assess the following sentence and determine if it is grammatically correct. Respond with 'Acceptable' or 'Unacceptable':", + "atk_acc": 14.0, + "atk_prompt": "Assess the next sentence and decide if it is grammatically correct. Accommodate with 'Acceptable' or 'Unacceptable':", + "transfer_ori_acc": 76.0, + "transfer_atk_acc": 73.5 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 54.9, + "origin_prompt": "As a tool for determining grammatical correctness, review the sentence and categorize its grammar as either 'acceptable' or 'unacceptable':", + "atk_acc": 0.0, + "atk_prompt": "Oj a toolbox for defines pronouns correcting, revisions the penalties and sorting its dictionary as either 'acceptable' or 'unacceptable':", + "transfer_ori_acc": 78.5, + "transfer_atk_acc": 19.5 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 67.16, + "origin_prompt": "Are the meanings of the following pair of sentences the same? Answer with 'equivalent' or 'not_equivalent'.", + "atk_acc": 0.49, + "atk_prompt": "Are the meanings of the following couple of sentences the same? Cope with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 73.5, + "transfer_atk_acc": 74.0 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 39.46, + "origin_prompt": "In the role of a textual comparison specialist, examine the given sentences and decide if they share the same meaning, responding with 'equivalent' or 'not_equivalent'.", + "atk_acc": 0.0, + "atk_prompt": "About the role of a textual comparison specialist, examine the given judgements and decide if they exchanging the same meaning, responses with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 74.5, + "transfer_atk_acc": 77.0 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 30.5, + "origin_prompt": "Review the given context and question, and decide if the context contains enough information to support the answer by selecting 'entailment' or 'not_entailment'.", + "atk_acc": 0.0, + "atk_prompt": "Reconsidered the given context and question, and ascertain if the context comprises enough information to favour the answer by choices 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 63.5, + "transfer_atk_acc": 72.5 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 35.3, + "origin_prompt": "As a language expert, assess if the given context entails the answer to the question and respond with 'entailment' or 'not_entailment'.", + "atk_acc": 0.6, + "atk_prompt": "As a language expert, weighed if the delivered framework translates the answer to the question and respondent with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 72.0, + "transfer_atk_acc": 54.0 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 23.9, + "origin_prompt": "Can these two statements be considered equal in meaning? Answer with 'equivalent' or 'not_equivalent'.", + "atk_acc": 0.0, + "atk_prompt": "Kan these two languages happen revisited equitable in signify? Meet with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 81.0, + "transfer_atk_acc": 50.0 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 16.9, + "origin_prompt": "In the role of a question similarity analyzer, examine the meaning of the provided questions and decide if they are equivalent, answering with 'equivalent' for equivalent questions or 'not_equivalent' for different questions.", + "atk_acc": 0.1, + "atk_prompt": "Onto the role of a question similarity analyzer, consider the significance of the envisaged matters and identify if they are equivalent, answering with 'equivalent' for equivalent questions or 'not_equivalent' for many matters. ", + "transfer_ori_acc": 82.0, + "transfer_atk_acc": 81.5 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 47.65, + "origin_prompt": "Please classify the relationship between the provided sentences as 'entailment' or 'not_entailment'.", + "atk_acc": 0.72, + "atk_prompt": "Invite classed the coitus between the foreseen priors as 'entailment' or 'not_entailment'.", + "transfer_ori_acc": 80.0, + "transfer_atk_acc": 76.5 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 48.01, + "origin_prompt": "As an entailment identification system, examine the connection between the following sentences and respond with 'entailment' or 'not_entailment':", + "atk_acc": 0.0, + "atk_prompt": "Because an entailment characterization regimes, considering the login between the following sentence and meet with 'entailment' or 'not_entailment':", + "transfer_ori_acc": 75.5, + "transfer_atk_acc": 81.5 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 53.9, + "origin_prompt": "Please identify the emotional tone of this passage: 'positive' or 'negative'?", + "atk_acc": 5.85, + "atk_prompt": "Invited specify the affective tone of this pass: 'positive' or 'negative'? ", + "transfer_ori_acc": 90.0, + "transfer_atk_acc": 93.0 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 42.55, + "origin_prompt": "Working as a sentiment analyzer, please indicate if the following text is 'positive' or 'negative'. Classify:", + "atk_acc": 3.21, + "atk_prompt": "Working as a sentiment analyzer, please indicate if the following text is 'positive' or 'negative'. Ranked: ", + "transfer_ori_acc": 92.0, + "transfer_atk_acc": 95.0 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 43.66, + "origin_prompt": "Determine if the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment'.", + "atk_acc": 1.41, + "atk_prompt": "Define if the given pair of priors exhibits entailment or not_entailment. Answers with 'entailment' or 'not_entailment'.", + "transfer_ori_acc": 61.97183098591549, + "transfer_atk_acc": 54.929577464788736 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 43.66, + "origin_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment':", + "atk_acc": 0.0, + "atk_prompt": "Onto your role as an entailment analysing tool, appraisals the relationship between the given condemnation and classed it as 'entailment' or 'not_entailment':", + "transfer_ori_acc": 52.112676056338024, + "transfer_atk_acc": 49.29577464788733 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 27.07, + "origin_prompt": "For each multiple-choice question about {}, identify the correct answer by selecting 'A', 'B', 'C', or 'D'.", + "atk_acc": 12.72, + "atk_prompt": "Among all multiple-choice subject about {}, find the accurate answer by chooses 'A', 'B', 'C', or 'D'. ", + "transfer_ori_acc": 61.0, + "transfer_atk_acc": 62.0 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 24.85, + "origin_prompt": "As an expert in {}, respond to the following multiple-choice question by selecting 'A', 'B', 'C', or 'D'.", + "atk_acc": 13.89, + "atk_prompt": "As an expert in {}, answering to the following multiple-choice things by picking 'A', 'B', 'C', or 'D'.", + "transfer_ori_acc": 61.0, + "transfer_atk_acc": 62.0 + } +] \ No newline at end of file diff --git a/results_transfer/vicuna-13b_chatgpt/textfooler_3_shot.json b/results_transfer/vicuna-13b_chatgpt/textfooler_3_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..04a44eb894476c4ab60af84e13e5a13c6aac1bc8 --- /dev/null +++ b/results_transfer/vicuna-13b_chatgpt/textfooler_3_shot.json @@ -0,0 +1,162 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 47.7, + "origin_prompt": "Examine the sentence and decide if its grammar is 'Acceptable' or 'Unacceptable'.", + "atk_acc": 8.0, + "atk_prompt": "Discuss the sentencing and choosing if its language is 'Acceptable' or 'Unacceptable'. ", + "transfer_ori_acc": 80.0, + "transfer_atk_acc": 78.5 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 35.2, + "origin_prompt": "Working as a grammar classifier, identify whether the given sentence has correct grammar, and respond with 'acceptable' for correct sentences or 'unacceptable' for incorrect ones.", + "atk_acc": 0.0, + "atk_prompt": "Working as a grammar classifier, identify whether the given sentence has correct jargon, and cope with 'acceptable' for correct punishments or 'unacceptable' for inappropriate ones. ", + "transfer_ori_acc": 80.0, + "transfer_atk_acc": 75.5 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 69.36, + "origin_prompt": "Determine if the meanings of the following sentences are semantically equivalent by responding with 'equivalent' or 'not_equivalent'.", + "atk_acc": 35.54, + "atk_prompt": "Decide if the senses of the following sentence are semantically equivalent by meet with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 70.5, + "transfer_atk_acc": 71.5 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 68.14, + "origin_prompt": "In the role of a sentence comparison analyst, assess the provided sentences and indicate if they convey the same meaning by selecting 'equivalent' or 'not_equivalent'.", + "atk_acc": 13.24, + "atk_prompt": "Towards the feature of a judgements versus psychoanalyst, reviews the supplied sentences and clarified if they convey the same senses by electing 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 71.5, + "transfer_atk_acc": 66.0 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 42.7, + "origin_prompt": "Review the given context and question, and decide if the context contains enough information to support the answer by selecting 'entailment' or 'not_entailment'.", + "atk_acc": 0.0, + "atk_prompt": "Discuss the given context and tura, and decide if the context encompasses enough information to support the satisfy by takes 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 84.0, + "transfer_atk_acc": 82.5 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 41.4, + "origin_prompt": "As a semantic researcher, evaluate whether the provided context supports the answer to the question and choose 'entailment' or 'not_entailment'.", + "atk_acc": 0.1, + "atk_prompt": "As a semantic researcher, appraisals whether the provided context supports the answer to the question and taking 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 82.5, + "transfer_atk_acc": 81.5 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 38.1, + "origin_prompt": "Assess whether the following statements are identical in meaning by answering 'equivalent' or 'not_equivalent'.", + "atk_acc": 21.5, + "atk_prompt": "Estimates whether the following pronouncements are analogous in connotation by answering 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 81.5, + "transfer_atk_acc": 83.5 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 33.0, + "origin_prompt": "In the role of a question similarity analyzer, examine the meaning of the provided questions and decide if they are equivalent, answering with 'equivalent' for equivalent questions or 'not_equivalent' for different questions.", + "atk_acc": 13.9, + "atk_prompt": "Across the rol of a trouble similarity analyzer, exploring the meaning of the lent questions and select if they are equivalent, answered with 'equivalent' for equivalent theme or 'not_equivalent' for various questions. ", + "transfer_ori_acc": 81.5, + "transfer_atk_acc": 82.5 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 43.68, + "origin_prompt": "Review the two statements and categorize their relationship as either 'entailment' or 'not_entailment'.", + "atk_acc": 2.17, + "atk_prompt": "Investigated the two statements and categorize their relation as either 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 82.5, + "transfer_atk_acc": 83.0 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 36.82, + "origin_prompt": "In the role of a semantic relationship analyzer, examine the connection between the given sentences and decide if they exhibit entailment or not_entailment. Answer with 'entailment' or 'not_entailment'.", + "atk_acc": 0.0, + "atk_prompt": "In the role of a semantic relationship profiler, review the ties between the given sentences and determining if they fairs entailment or not_entailment. Answers with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 83.0, + "transfer_atk_acc": 83.5 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 18.12, + "origin_prompt": "Evaluate the sentiment of the given text and classify it as 'positive' or 'negative'.", + "atk_acc": 0.0, + "atk_prompt": "Assessment the mood of the made laws and rank it as 'positive' or 'negative'. ", + "transfer_ori_acc": 96.0, + "transfer_atk_acc": 94.0 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 26.38, + "origin_prompt": "In the role of a sentiment analysis tool, respond with 'positive' or 'negative' to classify this statement.", + "atk_acc": 0.0, + "atk_prompt": "During the rol of a emotion exploring mechanism, respond with 'positive' or 'negative' to rank this affirmations. ", + "transfer_ori_acc": 97.0, + "transfer_atk_acc": 96.5 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 23.94, + "origin_prompt": "Does the relationship between the given sentences represent entailment or not_entailment? Respond with 'entailment' or 'not_entailment'.", + "atk_acc": 0.0, + "atk_prompt": "Did the coitus between the given sentence represent entailment or not_entailment? Respondent with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 67.6056338028169, + "transfer_atk_acc": 69.01408450704226 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 16.9, + "origin_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment'.", + "atk_acc": 0.0, + "atk_prompt": "Between your role as an entailment valuation appliance, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 63.38028169014085, + "transfer_atk_acc": 61.97183098591549 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 9.1, + "origin_prompt": "Answer the subsequent multiple-choice question about {} by picking the right option among 'A', 'B', 'C', or 'D'.", + "atk_acc": 3.73, + "atk_prompt": "Answer the consecutive multiple-choice questions about {} by choosing the ok option among 'A', 'B', 'C', or 'D'. ", + "transfer_ori_acc": 66.5, + "transfer_atk_acc": 67.0 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 9.22, + "origin_prompt": "With your understanding of {}, kindly answer the subsequent multiple-choice question by selecting 'A', 'B', 'C', or 'D'.", + "atk_acc": 4.61, + "atk_prompt": "With your understand of {}, gracefully answer the forth multiple-choice question by picking 'A', 'B', 'C', or 'D'.", + "transfer_ori_acc": 65.5, + "transfer_atk_acc": 66.0 + } +] \ No newline at end of file diff --git a/results_transfer/vicuna-13b_chatgpt/translation_0_shot.json b/results_transfer/vicuna-13b_chatgpt/translation_0_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..4d1722215d09a7fae62af348b5f8140ff9ebfeee --- /dev/null +++ b/results_transfer/vicuna-13b_chatgpt/translation_0_shot.json @@ -0,0 +1,82 @@ +[ + { + "dataset": "cola", + "type": "translation", + "origin_acc": 69.0, + "origin_prompt": "Assess the following sentence and determine if it is grammatically correct. Respond with 'Acceptable' or 'Unacceptable':", + "atk_acc": 5.8, + "atk_prompt": "As a tool for analyzing grammatical correctness, it categorizes the grammatical rules of the sentence as 'acceptable' or 'unacceptable'.", + "transfer_ori_acc": 76.0, + "transfer_atk_acc": 73.5 + }, + { + "dataset": "mrpc", + "type": "translation", + "origin_acc": 67.16, + "origin_prompt": "Are the meanings of the following pair of sentences the same? Answer with 'equivalent' or 'not_equivalent'.", + "atk_acc": 0.74, + "atk_prompt": "Please determine whether the following two sentences are 'equivalent' or 'not_equivalent' according to the standard of semantic comparison.", + "transfer_ori_acc": 73.0, + "transfer_atk_acc": 73.0 + }, + { + "dataset": "qnli", + "type": "translation", + "origin_acc": 38.4, + "origin_prompt": "As a textual inference expert, analyze if the answer to the question can be deduced from the provided context and select 'entailment' or 'not_entailment'.", + "atk_acc": 2.4, + "atk_prompt": "Determine whether the information provided in the context necessarily leads to the answer to the question asked and indicate 'entailment' or 'not_entailment'.", + "transfer_ori_acc": 80.0, + "transfer_atk_acc": 68.5 + }, + { + "dataset": "qqp", + "type": "translation", + "origin_acc": 30.0, + "origin_prompt": "Please indicate if the following pair of sentences share the same meaning by responding with 'equivalent' or 'not_equivalent'.", + "atk_acc": 0.5, + "atk_prompt": "Work on text sorting tasks labeled 'equivalent' or 'not_equivalent'.", + "transfer_ori_acc": 81.5, + "transfer_atk_acc": 56.49999999999999 + }, + { + "dataset": "rte", + "type": "translation", + "origin_acc": 51.26, + "origin_prompt": "In the role of a semantic relationship analyzer, examine the connection between the given sentences and decide if they exhibit entailment or not_entailment. Answer with 'entailment' or 'not_entailment':", + "atk_acc": 5.42, + "atk_prompt": "Classify it as 'entailment' or 'not_entailment' based on the content of the sentence.", + "transfer_ori_acc": 81.5, + "transfer_atk_acc": 78.0 + }, + { + "dataset": "sst2", + "type": "translation", + "origin_acc": 53.9, + "origin_prompt": "Please identify the emotional tone of this passage: 'positive' or 'negative'?", + "atk_acc": 9.29, + "atk_prompt": "Please rate the emotion of this sentence and classify it as 'positive' or 'negative'.", + "transfer_ori_acc": 90.5, + "transfer_atk_acc": 83.0 + }, + { + "dataset": "wnli", + "type": "translation", + "origin_acc": 43.66, + "origin_prompt": "Does the relationship between the given sentences represent entailment or not_entailment? Respond with 'entailment' or 'not_entailment'.", + "atk_acc": 5.63, + "atk_prompt": "Determine whether the given sentences have a cause-effect relationship as an engagement analysis tool and categorize them as 'entailment' or 'not_entailment'.", + "transfer_ori_acc": 59.154929577464785, + "transfer_atk_acc": 38.028169014084504 + }, + { + "dataset": "mmlu", + "type": "translation", + "origin_acc": 27.07, + "origin_prompt": "For each multiple-choice question about {}, identify the correct answer by selecting 'A', 'B', 'C', or 'D'.", + "atk_acc": 18.09, + "atk_prompt": "For the multiple choice problem about, choose the correct answer for '{}' from 'A', 'B', 'C', or 'D'.", + "transfer_ori_acc": 59.5, + "transfer_atk_acc": 53.5 + } +] \ No newline at end of file diff --git a/results_transfer/vicuna-13b_chatgpt/translation_3_shot.json b/results_transfer/vicuna-13b_chatgpt/translation_3_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..de556ddffa090ee99c1493521d3a60e7d0702011 --- /dev/null +++ b/results_transfer/vicuna-13b_chatgpt/translation_3_shot.json @@ -0,0 +1,82 @@ +[ + { + "dataset": "cola", + "type": "translation", + "origin_acc": 65.2, + "origin_prompt": "Determine if the grammar of the given sentence is 'Acceptable' or 'Unacceptable'.", + "atk_acc": 5.3, + "atk_prompt": "Please check that the following sentences are grammatically correct and categorize them as 'acceptable' or 'unacceptable'.", + "transfer_ori_acc": 80.5, + "transfer_atk_acc": 80.5 + }, + { + "dataset": "mrpc", + "type": "translation", + "origin_acc": 69.36, + "origin_prompt": "Determine if the meanings of the following sentences are semantically equivalent by responding with 'equivalent' or 'not_equivalent'.", + "atk_acc": 54.9, + "atk_prompt": "As a semantic comparison specialist, analyze the two data statements and insert them into one of the following categories: 'equivalent' or 'not_equivalent'.", + "transfer_ori_acc": 70.0, + "transfer_atk_acc": 70.0 + }, + { + "dataset": "qnli", + "type": "translation", + "origin_acc": 42.9, + "origin_prompt": "As a linguistic consultant, decide if the answer to the question is logically supported by the provided context and respond with 'entailment' or 'not_entailment'.", + "atk_acc": 1.6, + "atk_prompt": "Grasp the relations between sentences, determine if a given sentence necessarily contains the meaning of another sentence and respond with 'entailment' or 'not_entailment'.", + "transfer_ori_acc": 82.0, + "transfer_atk_acc": 81.5 + }, + { + "dataset": "qqp", + "type": "translation", + "origin_acc": 41.8, + "origin_prompt": "Do the following expressions mean the same thing? Provide your answer as 'equivalent' or 'not_equivalent'.", + "atk_acc": 21.7, + "atk_prompt": "For text classification tasks, use the labels 'equivalent' or 'not_equivalent' to determine the equivalence of statements.", + "transfer_ori_acc": 82.0, + "transfer_atk_acc": 83.0 + }, + { + "dataset": "rte", + "type": "translation", + "origin_acc": 50.54, + "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.", + "atk_acc": 3.97, + "atk_prompt": "Using implication analysis, determine whether the given sentences have a cause-effect relationship and categorize them as 'entailment' or 'not_entailment'.", + "transfer_ori_acc": 82.5, + "transfer_atk_acc": 82.5 + }, + { + "dataset": "sst2", + "type": "translation", + "origin_acc": 33.37, + "origin_prompt": "Taking on the role of an emotion classifier, specify if the provided phrase is 'positive' or 'negative'.", + "atk_acc": 0.46, + "atk_prompt": "Please determine whether the emotion of the sentence is 'positive' or 'negative' and categorize it.", + "transfer_ori_acc": 94.0, + "transfer_atk_acc": 94.5 + }, + { + "dataset": "wnli", + "type": "translation", + "origin_acc": 32.39, + "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.", + "atk_acc": 0.0, + "atk_prompt": "Using implication analysis, determine whether the given sentences have a cause-effect relationship and categorize them as 'entailment' or 'not_entailment'.", + "transfer_ori_acc": 70.4225352112676, + "transfer_atk_acc": 67.6056338028169 + }, + { + "dataset": "mmlu", + "type": "translation", + "origin_acc": 10.39, + "origin_prompt": "Answer the following multiple-choice question about {} by selecting the correct option: 'A', 'B', 'C', or 'D'.", + "atk_acc": 4.43, + "atk_prompt": "Choose the correct answer from the options 'A', 'B', 'C', or 'D' for the following questions about {}.", + "transfer_ori_acc": 65.5, + "transfer_atk_acc": 65.5 + } +] \ No newline at end of file diff --git a/results_transfer/vicuna-13b_google-flan-t5-large/bertattack_0_shot.json b/results_transfer/vicuna-13b_google-flan-t5-large/bertattack_0_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..356257764d47b4e6f0e98834590ba7d6165b7aa8 --- /dev/null +++ b/results_transfer/vicuna-13b_google-flan-t5-large/bertattack_0_shot.json @@ -0,0 +1,162 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 69.0, + "origin_prompt": "Assess the following sentence and determine if it is grammatically correct. Respond with 'Acceptable' or 'Unacceptable':", + "atk_acc": 1.7, + "atk_prompt": "check the below verb and consider if it is grammatically complete. act with 'Acceptable' or 'Unacceptable':", + "transfer_ori_acc": 78.0, + "transfer_atk_acc": 42.6 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 57.1, + "origin_prompt": "Functioning as a grammar evaluation tool, analyze the given sentence and decide if it is grammatically correct, responding with 'acceptable' or 'unacceptable':", + "atk_acc": 0.6, + "atk_prompt": "as as a grammar evaluation resource, analyze the next phrase and decide if it is grammatical+ correct, respond with 'acceptable' or 'unacceptable':", + "transfer_ori_acc": 76.3, + "transfer_atk_acc": 75.8 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 67.16, + "origin_prompt": "Are the meanings of the following pair of sentences the same? Answer with 'equivalent' or 'not_equivalent'.", + "atk_acc": 0.0, + "atk_prompt": "Are the meanings of the next in of sentences the same? q with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 82.1078431372549, + "transfer_atk_acc": 81.12745098039215 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 52.21, + "origin_prompt": "In your capacity as a semantic comparison tool, examine the given sentences and decide if their meanings are identical, responding with 'equivalent' or 'not_equivalent'.", + "atk_acc": 0.0, + "atk_prompt": "[UNK] your capacity as a semantic comparison tool, examine the given sentences and decide if their meanings are different, responding with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 83.33333333333334, + "transfer_atk_acc": 80.3921568627451 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 30.5, + "origin_prompt": "Review the given context and question, and decide if the context contains enough information to support the answer by selecting 'entailment' or 'not_entailment'.", + "atk_acc": 0.0, + "atk_prompt": "Review the given context and question, and decide if the context requires enough information to let the answer by choices 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 85.2, + "transfer_atk_acc": 77.3 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 38.4, + "origin_prompt": "As a textual inference expert, analyze if the answer to the question can be deduced from the provided context and select 'entailment' or 'not_entailment'.", + "atk_acc": 0.4, + "atk_prompt": "example a textual in expert, analyzing if the answer to the question can be deduced from the language context and think 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 78.10000000000001, + "transfer_atk_acc": 65.10000000000001 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 23.9, + "origin_prompt": "Can these two statements be considered equal in meaning? Answer with 'equivalent' or 'not_equivalent'.", + "atk_acc": 0.8, + "atk_prompt": "can these two symbols be considered equal in meaning? object with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 87.8, + "transfer_atk_acc": 80.5 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 0, + "origin_prompt": "", + "atk_acc": 100, + "atk_prompt": "", + "transfer_ori_acc": 0.0, + "transfer_atk_acc": 0.0 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 47.65, + "origin_prompt": "Please classify the relationship between the provided sentences as 'entailment' or 'not_entailment'.", + "atk_acc": 0.0, + "atk_prompt": "Please approach the relationship between the included sentences as 'entailment' or 'not_entailment'.", + "transfer_ori_acc": 85.1985559566787, + "transfer_atk_acc": 84.47653429602889 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 48.01, + "origin_prompt": "As an entailment identification system, examine the connection between the following sentences and respond with 'entailment' or 'not_entailment':", + "atk_acc": 1.44, + "atk_prompt": "she an entailment identification system, presented the connection between the following sentences and answered with 'entailment' or 'not_entailment':", + "transfer_ori_acc": 84.11552346570397, + "transfer_atk_acc": 83.39350180505414 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 48.17, + "origin_prompt": "After examining the following expression, label its emotion as either 'positive' or 'negative':", + "atk_acc": 0.57, + "atk_prompt": "After examination the given expression, type its emotion as of 'positive' or 'negative': ", + "transfer_ori_acc": 93.23394495412845, + "transfer_atk_acc": 93.46330275229357 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 37.27, + "origin_prompt": "As an emotion detector, determine if the provided passage conveys a 'positive' or 'negative' sentiment. Classify:", + "atk_acc": 0.46, + "atk_prompt": "As an emotion indicating, score if the provided reply conveys a 'positive' or 'negative' sentiment. classify: ", + "transfer_ori_acc": 94.83944954128441, + "transfer_atk_acc": 94.95412844036697 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 43.66, + "origin_prompt": "Determine if the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment'.", + "atk_acc": 0.0, + "atk_prompt": "Determine if the first pair of sentences demonstrate entailment or not_entailment. replied with 'entailment' or 'not_entailment'.", + "transfer_ori_acc": 61.97183098591549, + "transfer_atk_acc": 61.97183098591549 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 43.66, + "origin_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment':", + "atk_acc": 0.0, + "atk_prompt": "In your role as an entailment analysis tool, assessing the relationship between the formed sentences and classify it as 'entailment' or 'not_entailment':", + "transfer_ori_acc": 56.33802816901409, + "transfer_atk_acc": 54.929577464788736 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 25.09, + "origin_prompt": "Answer the subsequent multiple-choice question about {} by picking the right option among 'A', 'B', 'C', or 'D'.", + "atk_acc": 5.02, + "atk_prompt": "Answer the next multiple-s question about {} by picking the incorrect button or 'A', 'B', 'C', or 'D'. ", + "transfer_ori_acc": 44.858156028368796, + "transfer_atk_acc": 24.645390070921984 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 24.85, + "origin_prompt": "As an expert in {}, respond to the following multiple-choice question by selecting 'A', 'B', 'C', or 'D'.", + "atk_acc": 6.88, + "atk_prompt": "As an expert in {}, answer to the following multiple-guessing contestant by guessing 'A', 'B', 'C', or 'D'.", + "transfer_ori_acc": 44.680851063829785, + "transfer_atk_acc": 44.858156028368796 + } +] \ No newline at end of file diff --git a/results_transfer/vicuna-13b_google-flan-t5-large/bertattack_3_shot.json b/results_transfer/vicuna-13b_google-flan-t5-large/bertattack_3_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..e49eb658c05d0f9d6538159987a56e48713c7da0 --- /dev/null +++ b/results_transfer/vicuna-13b_google-flan-t5-large/bertattack_3_shot.json @@ -0,0 +1,162 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 62.2, + "origin_prompt": "Assess the grammatical structure of the given sentence and classify it as 'Acceptable' or 'Unacceptable'.", + "atk_acc": 3.8, + "atk_prompt": "Assess the context t of the of sentence and judge it as 'Acceptable' or 'Unacceptable'. ", + "transfer_ori_acc": 76.3, + "transfer_atk_acc": 74.0 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 49.6, + "origin_prompt": "As a tool for determining grammatical correctness, review the sentence and categorize its grammar as either 'acceptable' or 'unacceptable'.", + "atk_acc": 0.0, + "atk_prompt": "should a be for calculating adjective correctness, review the sentence and categorize its i as or 'acceptable' or 'unacceptable'. ", + "transfer_ori_acc": 75.0, + "transfer_atk_acc": 76.1 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 69.36, + "origin_prompt": "Determine if the meanings of the following sentences are semantically equivalent by responding with 'equivalent' or 'not_equivalent'.", + "atk_acc": 33.33, + "atk_prompt": "Determine if the words of the following sentence are semanticdouble equivalent by thinking with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 81.37254901960785, + "transfer_atk_acc": 74.75490196078431 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 68.14, + "origin_prompt": "In the role of a sentence comparison analyst, assess the provided sentences and indicate if they convey the same meaning by selecting 'equivalent' or 'not_equivalent'.", + "atk_acc": 17.16, + "atk_prompt": "to the role of a face comparison instance, assess the following sentences and indicate if they describe the same person by selecting 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 80.3921568627451, + "transfer_atk_acc": 79.16666666666666 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 41.2, + "origin_prompt": "Based on the information in the context, decide if the answer to the question is justified by choosing 'entailment' or 'not_entailment'.", + "atk_acc": 0.0, + "atk_prompt": "Based on the information in the context, decide if the answer to the is is committed by versus 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 87.8, + "transfer_atk_acc": 81.10000000000001 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 35.5, + "origin_prompt": "As a textual inference expert, analyze if the answer to the question can be deduced from the provided context and select 'entailment' or 'not_entailment'.", + "atk_acc": 0.0, + "atk_prompt": "As a textual inference expert, write if the answer to the task can be deduced from the supplied instance and search 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 77.2, + "transfer_atk_acc": 83.5 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 41.8, + "origin_prompt": "Do the following expressions mean the same thing? Provide your answer as 'equivalent' or 'not_equivalent'.", + "atk_acc": 10.3, + "atk_prompt": "are the following expression do the same as? substitute your subject as 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 87.8, + "transfer_atk_acc": 87.0 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 33.0, + "origin_prompt": "In the role of a question similarity analyzer, examine the meaning of the provided questions and decide if they are equivalent, answering with 'equivalent' for equivalent questions or 'not_equivalent' for different questions.", + "atk_acc": 8.2, + "atk_prompt": "In the role of a topic similarities analyzer, examining the semantic of the questions the and decide if they are equivalent, answer with 'equivalent' for equivalent numbers or 'not_equivalent' for various respondents. ", + "transfer_ori_acc": 86.5, + "transfer_atk_acc": 85.6 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 50.54, + "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.", + "atk_acc": 1.08, + "atk_prompt": "pair the following opposite sentences entailment or not_entailment? answers me with \"entailment\" or \"not_entailment\", just 1 word. ", + "transfer_ori_acc": 85.5595667870036, + "transfer_atk_acc": 83.39350180505414 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 36.82, + "origin_prompt": "In the role of a semantic relationship analyzer, examine the connection between the given sentences and decide if they exhibit entailment or not_entailment. Answer with 'entailment' or 'not_entailment'.", + "atk_acc": 0.0, + "atk_prompt": "In the role of a descriptive relationship analyzeing, read the connection between the given sentences and learn if they exhibit entailment or not_entailment. answers with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 81.2274368231047, + "transfer_atk_acc": 83.03249097472924 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 12.5, + "origin_prompt": "After examining the following expression, label its emotion as either 'positive' or 'negative'.", + "atk_acc": 0.0, + "atk_prompt": "After examining the following action, type its emotion as either 'positive' or 'negative'. ", + "transfer_ori_acc": 93.69266055045871, + "transfer_atk_acc": 94.15137614678899 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 33.37, + "origin_prompt": "Taking on the role of an emotion classifier, specify if the provided phrase is 'positive' or 'negative'.", + "atk_acc": 0.11, + "atk_prompt": "[UNK] on the relationship of an emotion classtoken, choose if the provided phrase is 'positive' or 'negative'. ", + "transfer_ori_acc": 94.38073394495413, + "transfer_atk_acc": 93.23394495412845 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 23.94, + "origin_prompt": "Does the relationship between the given sentences represent entailment or not_entailment? Respond with 'entailment' or 'not_entailment'.", + "atk_acc": 0.0, + "atk_prompt": "Does the relationship between the given sentences represent entailment or not_entailment? q with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 63.38028169014085, + "transfer_atk_acc": 63.38028169014085 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 16.9, + "origin_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment'.", + "atk_acc": 0.0, + "atk_prompt": "[UNK] your role as an entailment and tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 57.74647887323944, + "transfer_atk_acc": 59.154929577464785 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 8.51, + "origin_prompt": "Evaluate the multiple-choice question about {} and select the most fitting response from 'A', 'B', 'C', or 'D'.", + "atk_acc": 3.55, + "atk_prompt": "i the multiple-a asked about {} and select the most fitting response from 'A', 'B', 'C', or 'D'. ", + "transfer_ori_acc": 45.56737588652482, + "transfer_atk_acc": 45.92198581560284 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 9.22, + "origin_prompt": "With your understanding of {}, kindly answer the subsequent multiple-choice question by selecting 'A', 'B', 'C', or 'D'.", + "atk_acc": 3.37, + "atk_prompt": "With your understand of {}, self answer the subsequent multiple-chosen questions by stating 'A', 'B', 'C', or 'D'.", + "transfer_ori_acc": 45.39007092198582, + "transfer_atk_acc": 44.858156028368796 + } +] \ No newline at end of file diff --git a/results_transfer/vicuna-13b_google-flan-t5-large/checklist_0_shot.json b/results_transfer/vicuna-13b_google-flan-t5-large/checklist_0_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..5928e9cd575a892d032234326220b8fcf289d4ce --- /dev/null +++ b/results_transfer/vicuna-13b_google-flan-t5-large/checklist_0_shot.json @@ -0,0 +1,162 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 69.0, + "origin_prompt": "Assess the following sentence and determine if it is grammatically correct. Respond with 'Acceptable' or 'Unacceptable':", + "atk_acc": 2.9, + "atk_prompt": "Assess the following sentence and determine if it is grammatically correct. Respond with 'Acceptable' or 'Unacceptable' MQnugHcaoy:", + "transfer_ori_acc": 78.0, + "transfer_atk_acc": 76.1 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 50.7, + "origin_prompt": "In the role of a syntax analyzer, examine the grammar of the provided sentence and decide if it is correct, answering with 'acceptable' for grammatically correct sentences or 'unacceptable' for incorrect ones:", + "atk_acc": 7.5, + "atk_prompt": "In the role of a syntax analyzer, examine the grammar of the provided sentence and decide if it is correct, answering with 'acceptable' for grammatically correct sentences or 'unacceptable' for incorrect ones Q47Jmd4lMV:", + "transfer_ori_acc": 74.9, + "transfer_atk_acc": 74.2 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 56.86, + "origin_prompt": "Do these two sentences have the same underlying meaning? Respond with 'equivalent' or 'not_equivalent'.", + "atk_acc": 9.31, + "atk_prompt": "Do these two sentences have the same underlying meaning? Respond with 'equivalent' or 'not_equivalent' vTAjHynoIG. ", + "transfer_ori_acc": 82.35294117647058, + "transfer_atk_acc": 79.16666666666666 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 53.43, + "origin_prompt": "In the role of a sentence comparison analyst, assess the provided sentences and indicate if they convey the same meaning by selecting 'equivalent' or 'not_equivalent'.", + "atk_acc": 0.25, + "atk_prompt": "In the role of a sentence comparison analyst, assess the provided sentences and indicate if they convey the same meaning by selecting 'equivalent' or 'not_equivalent' Cev0s49fbe. ", + "transfer_ori_acc": 81.12745098039215, + "transfer_atk_acc": 81.12745098039215 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 30.5, + "origin_prompt": "Review the given context and question, and decide if the context contains enough information to support the answer by selecting 'entailment' or 'not_entailment'.", + "atk_acc": 2.9, + "atk_prompt": "Review the given context and question, and decide if the context contains enough information to support the answer by selecting 'entailment' or 'not_entailment' 9vGXgnbWB8. ", + "transfer_ori_acc": 85.2, + "transfer_atk_acc": 83.8 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 35.3, + "origin_prompt": "As a language expert, assess if the given context entails the answer to the question and respond with 'entailment' or 'not_entailment'.", + "atk_acc": 5.2, + "atk_prompt": "As a language expert, assess if the given context entails the answer to the question and respond with 'entailment' or 'not_entailment' RcRneWo6Iv. ", + "transfer_ori_acc": 88.4, + "transfer_atk_acc": 88.1 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 23.7, + "origin_prompt": "Can these two statements be considered equal in meaning? Answer with 'equivalent' or 'not_equivalent'.", + "atk_acc": 2.1, + "atk_prompt": "Can these two statements be considered equal in meaning? Answer with 'equivalent' or 'not_equivalent' 6a4Yn3RGVc. ", + "transfer_ori_acc": 87.8, + "transfer_atk_acc": 87.5 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 20.9, + "origin_prompt": "As an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions.", + "atk_acc": 1.2, + "atk_prompt": "As an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions 5XeflW1ZJc. ", + "transfer_ori_acc": 87.3, + "transfer_atk_acc": 87.2 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 41.16, + "origin_prompt": "Determine if the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment'.", + "atk_acc": 1.81, + "atk_prompt": "Determine if the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment' vTAjHynoIG.", + "transfer_ori_acc": 84.11552346570397, + "transfer_atk_acc": 83.75451263537906 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 49.82, + "origin_prompt": "Working as an entailment classifier, identify whether the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment':", + "atk_acc": 13.36, + "atk_prompt": "Working as an entailment classifier, identify whether the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment' vTAjHynoIG:", + "transfer_ori_acc": 83.03249097472924, + "transfer_atk_acc": 82.31046931407943 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 53.9, + "origin_prompt": "Please identify the emotional tone of this passage: 'positive' or 'negative'?", + "atk_acc": 0.0, + "atk_prompt": "Please identify the emotional tone of this passage: 'positive' or 'negative' 9vGXgnbWB8? ", + "transfer_ori_acc": 93.92201834862385, + "transfer_atk_acc": 94.03669724770643 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 42.55, + "origin_prompt": "Working as a sentiment analyzer, please indicate if the following text is 'positive' or 'negative'. Classify:", + "atk_acc": 4.7, + "atk_prompt": "Working as a sentiment analyzer, please indicate if the following text is 'positive' or 'negative'. Classify zq0DcZ5dnI: ", + "transfer_ori_acc": 95.29816513761467, + "transfer_atk_acc": 95.29816513761467 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 43.66, + "origin_prompt": "Determine if the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment'.", + "atk_acc": 0.0, + "atk_prompt": "Determine if the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment' Vq3aBzuZcD.", + "transfer_ori_acc": 61.97183098591549, + "transfer_atk_acc": 63.38028169014085 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 43.66, + "origin_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment':", + "atk_acc": 2.82, + "atk_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment' vTAjHynoIG:", + "transfer_ori_acc": 56.33802816901409, + "transfer_atk_acc": 56.33802816901409 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 25.67, + "origin_prompt": "Evaluate the multiple-choice question about {} and select the most fitting response from 'A', 'B', 'C', or 'D'.", + "atk_acc": 10.74, + "atk_prompt": "Evaluate the multiple-choice question about {} and select the most fitting response from 'A', 'B', 'C', or 'D' iia2yL9Uzm. ", + "transfer_ori_acc": 44.50354609929078, + "transfer_atk_acc": 44.32624113475177 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 24.39, + "origin_prompt": "With your knowledge of {}, tackle the following multiple-choice question by choosing 'A', 'B', 'C', or 'D'.", + "atk_acc": 11.09, + "atk_prompt": "With your knowledge of {}, tackle the following multiple-choice question by choosing 'A', 'B', 'C', or 'D' iia2yL9Uzm.", + "transfer_ori_acc": 43.97163120567376, + "transfer_atk_acc": 43.97163120567376 + } +] \ No newline at end of file diff --git a/results_transfer/vicuna-13b_google-flan-t5-large/checklist_3_shot.json b/results_transfer/vicuna-13b_google-flan-t5-large/checklist_3_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..a14e3626d15fff6b450c21c7ed787bbc1b6e603a --- /dev/null +++ b/results_transfer/vicuna-13b_google-flan-t5-large/checklist_3_shot.json @@ -0,0 +1,162 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 62.2, + "origin_prompt": "Assess the grammatical structure of the given sentence and classify it as 'Acceptable' or 'Unacceptable'.", + "atk_acc": 17.4, + "atk_prompt": "Assess the grammatical structure of the given sentence and classify it as 'Acceptable' or 'Unacceptable' Q47Jmd4lMV. ", + "transfer_ori_acc": 76.3, + "transfer_atk_acc": 76.4 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 35.2, + "origin_prompt": "Working as a grammar classifier, identify whether the given sentence has correct grammar, and respond with 'acceptable' for correct sentences or 'unacceptable' for incorrect ones.", + "atk_acc": 2.4, + "atk_prompt": "Working as a grammar classifier, identify whether the given sentence has correct grammar, and respond with 'acceptable' for correct sentences or 'unacceptable' for incorrect ones yh912BU8T0. ", + "transfer_ori_acc": 75.4, + "transfer_atk_acc": 75.4 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 69.36, + "origin_prompt": "Determine if the meanings of the following sentences are semantically equivalent by responding with 'equivalent' or 'not_equivalent'.", + "atk_acc": 62.01, + "atk_prompt": "Determine if the meanings of the following sentences are semantically equivalent by responding with 'equivalent' or 'not_equivalent' SFPCqpiClT. ", + "transfer_ori_acc": 81.37254901960785, + "transfer_atk_acc": 80.63725490196079 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 68.14, + "origin_prompt": "In the role of a sentence comparison analyst, assess the provided sentences and indicate if they convey the same meaning by selecting 'equivalent' or 'not_equivalent'.", + "atk_acc": 58.09, + "atk_prompt": "In the role of a sentence comparison analyst, assess the provided sentences and indicate if they convey the same meaning by selecting 'equivalent' or 'not_equivalent' xikCjkMydH. ", + "transfer_ori_acc": 80.3921568627451, + "transfer_atk_acc": 80.14705882352942 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 41.2, + "origin_prompt": "Based on the information in the context, decide if the answer to the question is justified by choosing 'entailment' or 'not_entailment'.", + "atk_acc": 1.1, + "atk_prompt": "Based on the information in the context, decide if the answer to the question is justified by choosing 'entailment' or 'not_entailment' zq0DcZ5dnI. ", + "transfer_ori_acc": 87.8, + "transfer_atk_acc": 86.7 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 41.4, + "origin_prompt": "As a semantic researcher, evaluate whether the provided context supports the answer to the question and choose 'entailment' or 'not_entailment'.", + "atk_acc": 1.0, + "atk_prompt": "As a semantic researcher, evaluate whether the provided context supports the answer to the question and choose 'entailment' or 'not_entailment' d6ZQ3u0GBQ. ", + "transfer_ori_acc": 87.3, + "transfer_atk_acc": 86.2 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 38.2, + "origin_prompt": "Assess whether the following statements are identical in meaning by answering 'equivalent' or 'not_equivalent'.", + "atk_acc": 23.5, + "atk_prompt": "Assess whether the following statements are identical in meaning by answering 'equivalent' or 'not_equivalent' 6a4Yn3RGVc. ", + "transfer_ori_acc": 86.9, + "transfer_atk_acc": 87.3 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 32.9, + "origin_prompt": "Working as a question classifier, identify whether the given questions share the same meaning, and respond with 'equivalent' for equivalent questions or 'not_equivalent' for different ones.", + "atk_acc": 25.0, + "atk_prompt": "Working as a question classifier, identify whether the given questions share the same meaning, and respond with 'equivalent' for equivalent questions or 'not_equivalent' for different ones AMsRIKZniY. ", + "transfer_ori_acc": 85.1, + "transfer_atk_acc": 84.89999999999999 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 43.68, + "origin_prompt": "Review the two statements and categorize their relationship as either 'entailment' or 'not_entailment'.", + "atk_acc": 7.22, + "atk_prompt": "Review the two statements and categorize their relationship as either 'entailment' or 'not_entailment' mzjVogkRhM. ", + "transfer_ori_acc": 84.83754512635379, + "transfer_atk_acc": 84.47653429602889 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 38.63, + "origin_prompt": "As an entailment identification system, examine the connection between the following sentences and respond with 'entailment' or 'not_entailment'.", + "atk_acc": 0.72, + "atk_prompt": "As an entailment identification system, examine the connection between the following sentences and respond with 'entailment' or 'not_entailment' Q47Jmd4lMV. ", + "transfer_ori_acc": 82.67148014440433, + "transfer_atk_acc": 82.67148014440433 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 14.45, + "origin_prompt": "Review this statement and decide whether it has a 'positive' or 'negative' sentiment.", + "atk_acc": 0.11, + "atk_prompt": "Review this statement and decide whether it has a 'positive' or 'negative' sentiment w52rwgo0Av. ", + "transfer_ori_acc": 95.18348623853211, + "transfer_atk_acc": 95.29816513761467 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 26.38, + "origin_prompt": "In the role of a sentiment analysis tool, respond with 'positive' or 'negative' to classify this statement.", + "atk_acc": 2.75, + "atk_prompt": "In the role of a sentiment analysis tool, respond with 'positive' or 'negative' to classify this statement Q47Jmd4lMV. ", + "transfer_ori_acc": 95.52752293577981, + "transfer_atk_acc": 95.41284403669725 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 28.17, + "origin_prompt": "Please classify the relationship between the provided sentences as 'entailment' or 'not_entailment'.", + "atk_acc": 8.45, + "atk_prompt": "Please classify the relationship between the provided sentences as 'entailment' or 'not_entailment' rzu98kF2KF. ", + "transfer_ori_acc": 64.7887323943662, + "transfer_atk_acc": 66.19718309859155 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 16.9, + "origin_prompt": "As an entailment identification system, examine the connection between the following sentences and respond with 'entailment' or 'not_entailment'.", + "atk_acc": 2.82, + "atk_prompt": "As an entailment identification system, examine the connection between the following sentences and respond with 'entailment' or 'not_entailment' rzu98kF2KF. ", + "transfer_ori_acc": 66.19718309859155, + "transfer_atk_acc": 66.19718309859155 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 10.15, + "origin_prompt": "In relation to the multiple-choice question on {}, please provide the accurate answer by choosing 'A', 'B', 'C', or 'D'.", + "atk_acc": 5.02, + "atk_prompt": "In relation to the multiple-choice question on {}, please provide the accurate answer by choosing 'A', 'B', 'C', or 'D' OB4KVJzIft. ", + "transfer_ori_acc": 46.808510638297875, + "transfer_atk_acc": 46.63120567375886 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 9.45, + "origin_prompt": "As someone well-versed in {}, please address the multiple-choice question below by selecting 'A', 'B', 'C', or 'D'.", + "atk_acc": 4.43, + "atk_prompt": "As someone well-versed in {}, please address the multiple-choice question below by selecting 'A', 'B', 'C', or 'D' w52rwgo0Av.", + "transfer_ori_acc": 46.63120567375886, + "transfer_atk_acc": 47.5177304964539 + } +] \ No newline at end of file diff --git a/results_transfer/vicuna-13b_google-flan-t5-large/deepwordbug_0_shot.json b/results_transfer/vicuna-13b_google-flan-t5-large/deepwordbug_0_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..b7a76877b31bf7c601d3e12c8641451773559a43 --- /dev/null +++ b/results_transfer/vicuna-13b_google-flan-t5-large/deepwordbug_0_shot.json @@ -0,0 +1,162 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 69.2, + "origin_prompt": "Assess the following sentence and determine if it is grammatically correct. Respond with 'Acceptable' or 'Unacceptable':", + "atk_acc": 3.6, + "atk_prompt": "sAsess the ofllowing lsentence and determine if it is grammatiaclly correct. Rspond with 'Acceptable' or 'Unacceptable':", + "transfer_ori_acc": 78.0, + "transfer_atk_acc": 73.7 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 50.6, + "origin_prompt": "In the role of a syntax analyzer, examine the grammar of the provided sentence and decide if it is correct, answering with 'acceptable' for grammatically correct sentences or 'unacceptable' for incorrect ones:", + "atk_acc": 4.8, + "atk_prompt": "n the role of a syntax analyzer, examine the grammar of the provided sentence and decide if it is correct, answering with 'acceptable' for grammatically correct sentences or 'unacceptable' for incorret oKnes:", + "transfer_ori_acc": 74.9, + "transfer_atk_acc": 73.6 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 57.84, + "origin_prompt": "Compare the following sentences and determine if they share the same semantic meaning by responding with 'equivalent' or 'not_equivalent'.", + "atk_acc": 0.0, + "atk_prompt": "oCmpare the follAwing sentewces and determine if they share the same swmantic meaning by responding with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 82.1078431372549, + "transfer_atk_acc": 81.61764705882352 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 52.21, + "origin_prompt": "In your capacity as a semantic comparison tool, examine the given sentences and decide if their meanings are identical, responding with 'equivalent' or 'not_equivalent'.", + "atk_acc": 1.47, + "atk_prompt": "IYn your capaciMy as a semantic comparison ool, examine the given sentences and deXide if their meanings are identRcal, rseponding with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 83.33333333333334, + "transfer_atk_acc": 80.3921568627451 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 30.5, + "origin_prompt": "Review the given context and question, and decide if the context contains enough information to support the answer by selecting 'entailment' or 'not_entailment'.", + "atk_acc": 4.7, + "atk_prompt": "Rview the gven cnotext and question, and edcide if the context containZs enough information to Vsupport the ainswer by selecting 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 85.2, + "transfer_atk_acc": 88.1 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 35.3, + "origin_prompt": "As a language expert, assess if the given context entails the answer to the question and respond with 'entailment' or 'not_entailment'.", + "atk_acc": 2.1, + "atk_prompt": "As a language epxert, arssess if the gien contet entaxls the ansGer to the questAon and resNpond with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 88.4, + "transfer_atk_acc": 40.400000000000006 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 24.1, + "origin_prompt": "Analyze if the given set of sentences have the same connotation by answering with 'equivalent' or 'not_equivalent'.", + "atk_acc": 2.0, + "atk_prompt": "tAnalyze if the giCen est of sentUnces have the same connotatioq by answering with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 85.0, + "transfer_atk_acc": 83.0 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 16.9, + "origin_prompt": "In the role of a question similarity analyzer, examine the meaning of the provided questions and decide if they are equivalent, answering with 'equivalent' for equivalent questions or 'not_equivalent' for different questions.", + "atk_acc": 0.7, + "atk_prompt": "IZn the role of a questoion similarity naalyzer, examin the meJaning of the provided questions and Uecide if they are equivalent, answering with 'equivalent' for equivalent questions or 'not_equivalent' for dSfferent questioXns. ", + "transfer_ori_acc": 87.1, + "transfer_atk_acc": 86.5 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 41.16, + "origin_prompt": "Determine if the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment'.", + "atk_acc": 1.08, + "atk_prompt": "DetermiCne if the givePn pir of sentcnces disWlays entailment or not_entailment. Rtspond with 'entailment' or 'not_entailment'.", + "transfer_ori_acc": 84.11552346570397, + "transfer_atk_acc": 83.39350180505414 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 49.82, + "origin_prompt": "Working as an entailment classifier, identify whether the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment':", + "atk_acc": 1.08, + "atk_prompt": "orking as an entailment clasifier, idenStify whethre the givec ptair of sentInces displays entailment or not_entailment. ReKspond with 'entailment' or 'not_entailment':", + "transfer_ori_acc": 83.03249097472924, + "transfer_atk_acc": 83.75451263537906 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 48.05, + "origin_prompt": "After examining the following expression, label its emotion as either 'positive' or 'negative':", + "atk_acc": 0.8, + "atk_prompt": "kfter eximining the followinM expression, labe its emolion as egther 'positive' or 'negative': ", + "transfer_ori_acc": 93.23394495412845, + "transfer_atk_acc": 92.88990825688074 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 41.06, + "origin_prompt": "Functioning as a sentiment identification tool, assess if the following expression is 'positive' or 'negative'. Classify:", + "atk_acc": 27.18, + "atk_prompt": "Functionnig as a sentiment identification tool, assess if the flolowing xepression is 'positive' or 'negative'. ClUssify: ", + "transfer_ori_acc": 94.83944954128441, + "transfer_atk_acc": 93.46330275229357 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 43.66, + "origin_prompt": "Determine if the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment'.", + "atk_acc": 0.0, + "atk_prompt": "VDetermine if the giPen Apair of setences displUays entailment or not_entailment. QRespond with 'entailment' or 'not_entailment'.", + "transfer_ori_acc": 61.97183098591549, + "transfer_atk_acc": 64.7887323943662 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 43.66, + "origin_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment':", + "atk_acc": 0.0, + "atk_prompt": "kn your rohe as an entailment analysis tool, Zassess the relationship between the gUiven saentences and clasFsify it as 'entailment' or 'not_entailment':", + "transfer_ori_acc": 56.33802816901409, + "transfer_atk_acc": 60.56338028169014 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 25.67, + "origin_prompt": "Evaluate the multiple-choice question about {} and select the most fitting response from 'A', 'B', 'C', or 'D'.", + "atk_acc": 14.7, + "atk_prompt": "Evaluate the multple-choice question about {} and elect the most fitting respone from 'A', 'B', 'C', or 'D'. ", + "transfer_ori_acc": 44.50354609929078, + "transfer_atk_acc": 44.32624113475177 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 25.09, + "origin_prompt": "As a knowledgeable individual in {}, provide your response to the multiple-choice question by choosing 'A', 'B', 'C', or 'D'.", + "atk_acc": 16.45, + "atk_prompt": "As a kAnowledgeable indviidual in {}, provide your ersponse to the multipole-choice quesxion by choosing 'A', 'B', 'C', or 'D'.", + "transfer_ori_acc": 44.50354609929078, + "transfer_atk_acc": 44.680851063829785 + } +] \ No newline at end of file diff --git a/results_transfer/vicuna-13b_google-flan-t5-large/deepwordbug_3_shot.json b/results_transfer/vicuna-13b_google-flan-t5-large/deepwordbug_3_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..d9570d263f4be3a2f8ce8a143b3189e4ccb5960a --- /dev/null +++ b/results_transfer/vicuna-13b_google-flan-t5-large/deepwordbug_3_shot.json @@ -0,0 +1,162 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 62.0, + "origin_prompt": "Assess the grammatical structure of the given sentence and classify it as 'Acceptable' or 'Unacceptable'.", + "atk_acc": 11.4, + "atk_prompt": "Assess the grammatcal etructure of the given entence and classif it as 'Acceptable' or 'Unacceptable'. ", + "transfer_ori_acc": 76.3, + "transfer_atk_acc": 77.9 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 35.1, + "origin_prompt": "Working as a grammar classifier, identify whether the given sentence has correct grammar, and respond with 'acceptable' for correct sentences or 'unacceptable' for incorrect ones.", + "atk_acc": 0.0, + "atk_prompt": "Working as a grammar clsasifier, identify whether the given sentence has correct grammar, and reszond with 'acceptable' for correct sentences or 'unacceptable' for incorrect noes. ", + "transfer_ori_acc": 75.4, + "transfer_atk_acc": 74.7 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 67.65, + "origin_prompt": "Do the meanings of these two statements align? Indicate your answer with 'equivalent' or 'not_equivalent'.", + "atk_acc": 36.76, + "atk_prompt": "Zo the meanings of these Xwo statements Ualign? IndicNte your answeY with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 81.86274509803921, + "transfer_atk_acc": 81.61764705882352 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 69.12, + "origin_prompt": "In your capacity as a semantic assessment tool, evaluate the provided sentences and classify their meanings as 'equivalent' or 'not_equivalent'.", + "atk_acc": 34.31, + "atk_prompt": "nI your capacitCy as a sebmantic assgssment otol, evaluate the provided sentenceAs and Zlassify their meaniPgs as 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 75.24509803921569, + "transfer_atk_acc": 79.41176470588235 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 42.7, + "origin_prompt": "Review the given context and question, and decide if the context contains enough information to support the answer by selecting 'entailment' or 'not_entailment'.", + "atk_acc": 0.2, + "atk_prompt": "RevTiew the ginen cEntext and uestion, and decid if the context conatains enough iformation to uspport the answVer by selectin 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 85.2, + "transfer_atk_acc": 69.5 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 42.9, + "origin_prompt": "As a linguistic consultant, decide if the answer to the question is logically supported by the provided context and respond with 'entailment' or 'not_entailment'.", + "atk_acc": 1.8, + "atk_prompt": "Abs a linguistic consultant, decide if the answer to the question is logically supported by the provided Aontext and respGond with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 85.1, + "transfer_atk_acc": 88.4 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 38.1, + "origin_prompt": "Assess whether the following statements are identical in meaning by answering 'equivalent' or 'not_equivalent'.", + "atk_acc": 20.3, + "atk_prompt": "Asess hwether the following statemets are identiacl in meaning by answerinEg 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 86.9, + "transfer_atk_acc": 84.8 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 33.6, + "origin_prompt": "In your role as a question comparison tool, assess the following pair of questions and classify them as 'equivalent' or 'not_equivalent'.", + "atk_acc": 23.6, + "atk_prompt": "In your Irole as a questioGn comparison tolo, ussess the foMlowing pair of questrions and classify them as 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 85.2, + "transfer_atk_acc": 87.3 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 50.54, + "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.", + "atk_acc": 0.36, + "atk_prompt": "Aer the following wto sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one wor. ", + "transfer_ori_acc": 85.5595667870036, + "transfer_atk_acc": 81.94945848375451 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 36.82, + "origin_prompt": "In the role of a semantic relationship analyzer, examine the connection between the given sentences and decide if they exhibit entailment or not_entailment. Answer with 'entailment' or 'not_entailment'.", + "atk_acc": 1.08, + "atk_prompt": "In the role of a semgntic relationship anayzer, examin the connection between the given sentences and decide if they exhibit entailment or not_entailment. AnCwer with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 81.2274368231047, + "transfer_atk_acc": 81.94945848375451 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 14.45, + "origin_prompt": "Review this statement and decide whether it has a 'positive' or 'negative' sentiment.", + "atk_acc": 0.0, + "atk_prompt": "GReview this stateent and decide wdether it has a 'positive' or 'negative' sentiment. ", + "transfer_ori_acc": 95.18348623853211, + "transfer_atk_acc": 94.95412844036697 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 26.49, + "origin_prompt": "In the role of a sentiment analysis tool, respond with 'positive' or 'negative' to classify this statement.", + "atk_acc": 0.0, + "atk_prompt": "In the ole of a senteiment naalysis tool, resond with 'positive' or 'negative' to classfy this sattement. ", + "transfer_ori_acc": 95.52752293577981, + "transfer_atk_acc": 95.52752293577981 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 23.94, + "origin_prompt": "Does the relationship between the given sentences represent entailment or not_entailment? Respond with 'entailment' or 'not_entailment'.", + "atk_acc": 0.0, + "atk_prompt": "SDoes the relationship between the given stentences represent entailment or not_entailment? ResYond with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 63.38028169014085, + "transfer_atk_acc": 67.6056338028169 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 16.9, + "origin_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment'.", + "atk_acc": 0.0, + "atk_prompt": "mn your roye as an entailment aunalysis tooS, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 57.74647887323944, + "transfer_atk_acc": 61.97183098591549 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 10.15, + "origin_prompt": "In relation to the multiple-choice question on {}, please provide the accurate answer by choosing 'A', 'B', 'C', or 'D'.", + "atk_acc": 3.5, + "atk_prompt": "oIn relatin to the mhltiple-choice question on {}, plese provide the accurate answer by choosing 'A', 'B', 'C', or 'D'. ", + "transfer_ori_acc": 46.808510638297875, + "transfer_atk_acc": 44.50354609929078 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 9.8, + "origin_prompt": "With your knowledge of {}, tackle the following multiple-choice question by choosing 'A', 'B', 'C', or 'D'.", + "atk_acc": 6.07, + "atk_prompt": "With your knowlegde of {}, tacke the folloing mulBiple-choice question by choosing 'A', 'B', 'C', or 'D'.", + "transfer_ori_acc": 46.09929078014184, + "transfer_atk_acc": 45.0354609929078 + } +] \ No newline at end of file diff --git a/results_transfer/vicuna-13b_google-flan-t5-large/stresstest_0_shot.json b/results_transfer/vicuna-13b_google-flan-t5-large/stresstest_0_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..7b968a2f6c59edd65a84554ab856bf9001f6b999 --- /dev/null +++ b/results_transfer/vicuna-13b_google-flan-t5-large/stresstest_0_shot.json @@ -0,0 +1,162 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 69.0, + "origin_prompt": "Assess the following sentence and determine if it is grammatically correct. Respond with 'Acceptable' or 'Unacceptable':", + "atk_acc": 34.8, + "atk_prompt": "Assess the following sentence and determine if it is grammatically correct. Respond with 'Acceptable' or 'Unacceptable' and false is not true :", + "transfer_ori_acc": 78.0, + "transfer_atk_acc": 63.5 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 57.1, + "origin_prompt": "Functioning as a grammar evaluation tool, analyze the given sentence and decide if it is grammatically correct, responding with 'acceptable' or 'unacceptable':", + "atk_acc": 20.8, + "atk_prompt": "Functioning as a grammar evaluation tool, analyze the given sentence and decide if it is grammatically correct, responding with 'acceptable' or 'unacceptable' and false is not true :", + "transfer_ori_acc": 76.3, + "transfer_atk_acc": 76.9 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 58.09, + "origin_prompt": "Compare the following sentences and determine if they share the same semantic meaning by responding with 'equivalent' or 'not_equivalent'.", + "atk_acc": 56.37, + "atk_prompt": "Compare the following sentences and determine if they share the same semantic meaning by responding with 'equivalent' or 'not_equivalent' and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 82.1078431372549, + "transfer_atk_acc": 31.61764705882353 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 52.21, + "origin_prompt": "In your capacity as a semantic comparison tool, examine the given sentences and decide if their meanings are identical, responding with 'equivalent' or 'not_equivalent'.", + "atk_acc": 33.33, + "atk_prompt": "In your capacity as a semantic comparison tool, examine the given sentences and decide if their meanings are identical, responding with 'equivalent' or 'not_equivalent' and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 83.33333333333334, + "transfer_atk_acc": 20.588235294117645 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 30.9, + "origin_prompt": "Based on the provided context and question, decide if the information supports the answer by responding with 'entailment' or 'not_entailment'.", + "atk_acc": 16.2, + "atk_prompt": "Based on the provided context and question, decide if the information supports the answer by responding with 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 89.4, + "transfer_atk_acc": 80.9 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 35.3, + "origin_prompt": "As a language expert, assess if the given context entails the answer to the question and respond with 'entailment' or 'not_entailment'.", + "atk_acc": 19.8, + "atk_prompt": "As a language expert, assess if the given context entails the answer to the question and respond with 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 88.4, + "transfer_atk_acc": 76.8 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 23.9, + "origin_prompt": "Can these two statements be considered equal in meaning? Answer with 'equivalent' or 'not_equivalent'.", + "atk_acc": 27.7, + "atk_prompt": "Can these two statements be considered equal in meaning? Answer with 'equivalent' or 'not_equivalent' and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 87.8, + "transfer_atk_acc": 80.10000000000001 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 21.1, + "origin_prompt": "As an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions.", + "atk_acc": 21.5, + "atk_prompt": "As an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 87.3, + "transfer_atk_acc": 87.5 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 47.65, + "origin_prompt": "Please classify the relationship between the provided sentences as 'entailment' or 'not_entailment'.", + "atk_acc": 23.47, + "atk_prompt": "Please classify the relationship between the provided sentences as 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true .", + "transfer_ori_acc": 85.1985559566787, + "transfer_atk_acc": 76.53429602888086 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 48.01, + "origin_prompt": "As an entailment identification system, examine the connection between the following sentences and respond with 'entailment' or 'not_entailment':", + "atk_acc": 40.07, + "atk_prompt": "As an entailment identification system, examine the connection between the following sentences and respond with 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true :", + "transfer_ori_acc": 84.11552346570397, + "transfer_atk_acc": 80.14440433212997 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 53.9, + "origin_prompt": "Please identify the emotional tone of this passage: 'positive' or 'negative'?", + "atk_acc": 17.89, + "atk_prompt": "Please identify the emotional tone of this passage: 'positive' or 'negative' and true is true and true is true and true is true and true is true and true is true ? ", + "transfer_ori_acc": 93.92201834862385, + "transfer_atk_acc": 91.39908256880734 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 40.94, + "origin_prompt": "Functioning as a sentiment identification tool, assess if the following expression is 'positive' or 'negative'. Classify:", + "atk_acc": 36.47, + "atk_prompt": "Functioning as a sentiment identification tool, assess if the following expression is 'positive' or 'negative'. Classify and false is not true : ", + "transfer_ori_acc": 94.83944954128441, + "transfer_atk_acc": 93.80733944954129 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 38.03, + "origin_prompt": "Assess the connection between the following sentences and classify it as 'entailment' or 'not_entailment'.", + "atk_acc": 14.08, + "atk_prompt": "Assess the connection between the following sentences and classify it as 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true .", + "transfer_ori_acc": 61.97183098591549, + "transfer_atk_acc": 57.74647887323944 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 43.66, + "origin_prompt": "Functioning as an entailment evaluation tool, analyze the provided sentences and decide if their relationship is 'entailment' or 'not_entailment':", + "atk_acc": 32.39, + "atk_prompt": "Functioning as an entailment evaluation tool, analyze the provided sentences and decide if their relationship is 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true :", + "transfer_ori_acc": 61.97183098591549, + "transfer_atk_acc": 46.478873239436616 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 25.09, + "origin_prompt": "Answer the subsequent multiple-choice question about {} by picking the right option among 'A', 'B', 'C', or 'D'.", + "atk_acc": 19.49, + "atk_prompt": "Answer the subsequent multiple-choice question about {} by picking the right option among 'A', 'B', 'C', or 'D' and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 44.858156028368796, + "transfer_atk_acc": 45.56737588652482 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 24.39, + "origin_prompt": "With your knowledge of {}, tackle the following multiple-choice question by choosing 'A', 'B', 'C', or 'D'.", + "atk_acc": 12.72, + "atk_prompt": "With your knowledge of {}, tackle the following multiple-choice question by choosing 'A', 'B', 'C', or 'D' and true is true and true is true and true is true and true is true and true is true .", + "transfer_ori_acc": 43.97163120567376, + "transfer_atk_acc": 44.858156028368796 + } +] \ No newline at end of file diff --git a/results_transfer/vicuna-13b_google-flan-t5-large/stresstest_3_shot.json b/results_transfer/vicuna-13b_google-flan-t5-large/stresstest_3_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..b1e28c47814b391a9646abeab94710778d84a5cb --- /dev/null +++ b/results_transfer/vicuna-13b_google-flan-t5-large/stresstest_3_shot.json @@ -0,0 +1,162 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 47.7, + "origin_prompt": "Examine the sentence and decide if its grammar is 'Acceptable' or 'Unacceptable'.", + "atk_acc": 10.6, + "atk_prompt": "Examine the sentence and decide if its grammar is 'Acceptable' or 'Unacceptable' and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 76.3, + "transfer_atk_acc": 76.2 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 35.2, + "origin_prompt": "Working as a grammar classifier, identify whether the given sentence has correct grammar, and respond with 'acceptable' for correct sentences or 'unacceptable' for incorrect ones.", + "atk_acc": 0.9, + "atk_prompt": "Working as a grammar classifier, identify whether the given sentence has correct grammar, and respond with 'acceptable' for correct sentences or 'unacceptable' for incorrect ones and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 75.4, + "transfer_atk_acc": 75.9 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 67.65, + "origin_prompt": "Do the meanings of these two statements align? Indicate your answer with 'equivalent' or 'not_equivalent'.", + "atk_acc": 67.4, + "atk_prompt": "Do the meanings of these two statements align? Indicate your answer with 'equivalent' or 'not_equivalent' and false is not true . ", + "transfer_ori_acc": 81.86274509803921, + "transfer_atk_acc": 76.7156862745098 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 68.14, + "origin_prompt": "In the role of a sentence comparison analyst, assess the provided sentences and indicate if they convey the same meaning by selecting 'equivalent' or 'not_equivalent'.", + "atk_acc": 59.8, + "atk_prompt": "In the role of a sentence comparison analyst, assess the provided sentences and indicate if they convey the same meaning by selecting 'equivalent' or 'not_equivalent' and false is not true . ", + "transfer_ori_acc": 80.3921568627451, + "transfer_atk_acc": 81.86274509803921 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 41.2, + "origin_prompt": "Based on the information in the context, decide if the answer to the question is justified by choosing 'entailment' or 'not_entailment'.", + "atk_acc": 6.7, + "atk_prompt": "Based on the information in the context, decide if the answer to the question is justified by choosing 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 87.8, + "transfer_atk_acc": 79.4 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 42.9, + "origin_prompt": "As a linguistic consultant, decide if the answer to the question is logically supported by the provided context and respond with 'entailment' or 'not_entailment'.", + "atk_acc": 7.0, + "atk_prompt": "As a linguistic consultant, decide if the answer to the question is logically supported by the provided context and respond with 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 85.1, + "transfer_atk_acc": 81.3 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 38.1, + "origin_prompt": "Assess whether the following statements are identical in meaning by answering 'equivalent' or 'not_equivalent'.", + "atk_acc": 41.2, + "atk_prompt": "Assess whether the following statements are identical in meaning by answering 'equivalent' or 'not_equivalent' and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 86.9, + "transfer_atk_acc": 87.1 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 33.0, + "origin_prompt": "In the role of a question similarity analyzer, examine the meaning of the provided questions and decide if they are equivalent, answering with 'equivalent' for equivalent questions or 'not_equivalent' for different questions.", + "atk_acc": 26.4, + "atk_prompt": "In the role of a question similarity analyzer, examine the meaning of the provided questions and decide if they are equivalent, answering with 'equivalent' for equivalent questions or 'not_equivalent' for different questions and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 86.5, + "transfer_atk_acc": 87.2 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 50.54, + "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.", + "atk_acc": 14.08, + "atk_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word and true is true . ", + "transfer_ori_acc": 85.5595667870036, + "transfer_atk_acc": 85.92057761732852 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 41.88, + "origin_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment'.", + "atk_acc": 6.5, + "atk_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 83.03249097472924, + "transfer_atk_acc": 44.04332129963899 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 14.45, + "origin_prompt": "Review this statement and decide whether it has a 'positive' or 'negative' sentiment.", + "atk_acc": 0.0, + "atk_prompt": "Review this statement and decide whether it has a 'positive' or 'negative' sentiment and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 95.18348623853211, + "transfer_atk_acc": 95.52752293577981 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 26.38, + "origin_prompt": "In the role of a sentiment analysis tool, respond with 'positive' or 'negative' to classify this statement.", + "atk_acc": 1.95, + "atk_prompt": "In the role of a sentiment analysis tool, respond with 'positive' or 'negative' to classify this statement and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 95.52752293577981, + "transfer_atk_acc": 95.75688073394495 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 28.17, + "origin_prompt": "Please classify the relationship between the provided sentences as 'entailment' or 'not_entailment'.", + "atk_acc": 1.41, + "atk_prompt": "Please classify the relationship between the provided sentences as 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 64.7887323943662, + "transfer_atk_acc": 67.6056338028169 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 16.9, + "origin_prompt": "As an entailment identification system, examine the connection between the following sentences and respond with 'entailment' or 'not_entailment'.", + "atk_acc": 2.82, + "atk_prompt": "As an entailment identification system, examine the connection between the following sentences and respond with 'entailment' or 'not_entailment' and true is true . ", + "transfer_ori_acc": 66.19718309859155, + "transfer_atk_acc": 69.01408450704226 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 10.15, + "origin_prompt": "In relation to the multiple-choice question on {}, please provide the accurate answer by choosing 'A', 'B', 'C', or 'D'.", + "atk_acc": 11.67, + "atk_prompt": "In relation to the multiple-choice question on {}, please provide the accurate answer by choosing 'A', 'B', 'C', or 'D' and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 46.808510638297875, + "transfer_atk_acc": 46.63120567375886 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 9.8, + "origin_prompt": "With your knowledge of {}, tackle the following multiple-choice question by choosing 'A', 'B', 'C', or 'D'.", + "atk_acc": 10.85, + "atk_prompt": "With your knowledge of {}, tackle the following multiple-choice question by choosing 'A', 'B', 'C', or 'D' and true is true and true is true and true is true and true is true and true is true .", + "transfer_ori_acc": 46.09929078014184, + "transfer_atk_acc": 46.63120567375886 + } +] \ No newline at end of file diff --git a/results_transfer/vicuna-13b_google-flan-t5-large/textbugger_0_shot.json b/results_transfer/vicuna-13b_google-flan-t5-large/textbugger_0_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..16bcb3768a45833c6608c09f898d95e19ed3e7f7 --- /dev/null +++ b/results_transfer/vicuna-13b_google-flan-t5-large/textbugger_0_shot.json @@ -0,0 +1,162 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 69.0, + "origin_prompt": "Assess the following sentence and determine if it is grammatically correct. Respond with 'Acceptable' or 'Unacceptable':", + "atk_acc": 0.0, + "atk_prompt": "Evaluating the later sen𝚝ence and determine if it is grammatically correct. Respoոd with 'Acceptable' or 'Unacceptable':", + "transfer_ori_acc": 78.0, + "transfer_atk_acc": 76.1 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 54.9, + "origin_prompt": "As a tool for determining grammatical correctness, review the sentence and categorize its grammar as either 'acceptable' or 'unacceptable':", + "atk_acc": 0.0, + "atk_prompt": "Aѕ a tool for determining grammatical regularity, reviewing the penalties and categorizе its grammar as ei𝚝her 'acceptable' or 'unacceptable':", + "transfer_ori_acc": 74.9, + "transfer_atk_acc": 55.50000000000001 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 58.09, + "origin_prompt": "Compare the following sentences and determine if they share the same semantic meaning by responding with 'equivalent' or 'not_equivalent'.", + "atk_acc": 0.49, + "atk_prompt": "Comрare the later sentences and determine if they share the same semantic meaning by respoոding with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 82.1078431372549, + "transfer_atk_acc": 81.61764705882352 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 53.43, + "origin_prompt": "In the role of a sentence comparison analyst, assess the provided sentences and indicate if they convey the same meaning by selecting 'equivalent' or 'not_equivalent'.", + "atk_acc": 0.0, + "atk_prompt": "Iո the rloe of a sentence comparison analyst, asess the provided sentences and indicate if they convey the same meaning by selec𝚝ing 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 81.12745098039215, + "transfer_atk_acc": 83.08823529411765 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 30.5, + "origin_prompt": "Review the given context and question, and decide if the context contains enough information to support the answer by selecting 'entailment' or 'not_entailment'.", + "atk_acc": 0.0, + "atk_prompt": "Revisited the afforded contеxt and question, and decide if the context contains enough information to support the answer by selec𝚝ing 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 85.2, + "transfer_atk_acc": 85.39999999999999 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 38.4, + "origin_prompt": "As a textual inference expert, analyze if the answer to the question can be deduced from the provided context and select 'entailment' or 'not_entailment'.", + "atk_acc": 0.1, + "atk_prompt": "Aѕ a textual deduced expeⲅt, analyzes if the replying to the question can be deduced from the provided context and selec𝚝 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 78.10000000000001, + "transfer_atk_acc": 83.89999999999999 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 23.9, + "origin_prompt": "Can these two statements be considered equal in meaning? Answer with 'equivalent' or 'not_equivalent'.", + "atk_acc": 0.2, + "atk_prompt": "Possible these two statemen𝚝s be considered equal in meaing? Answer with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 87.8, + "transfer_atk_acc": 87.8 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 16.9, + "origin_prompt": "In the role of a question similarity analyzer, examine the meaning of the provided questions and decide if they are equivalent, answering with 'equivalent' for equivalent questions or 'not_equivalent' for different questions.", + "atk_acc": 0.0, + "atk_prompt": "Iո the role of a question similarity analyzer, examine the meaning of the provided questions and decide if they are equivalent, answering with 'equivalent' for equivalent questions or 'not_equivalent' for different ques𝚝ions. ", + "transfer_ori_acc": 87.1, + "transfer_atk_acc": 87.2 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 45.13, + "origin_prompt": "Does the relationship between the given sentences represent entailment or not_entailment? Respond with 'entailment' or 'not_entailment'.", + "atk_acc": 1.08, + "atk_prompt": "Ai the relationship between the given sentence represent entailment or not_entailment? Answer with 'entailment' or 'not_entailment'.", + "transfer_ori_acc": 85.1985559566787, + "transfer_atk_acc": 84.47653429602889 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 51.26, + "origin_prompt": "In the role of a semantic relationship analyzer, examine the connection between the given sentences and decide if they exhibit entailment or not_entailment. Answer with 'entailment' or 'not_entailment':", + "atk_acc": 0.36, + "atk_prompt": "Onto the role of a semantic relationship offing, e×amine the connection between the given seոtences and decіde if they exhibit entailment or not_entailment. Answer with 'entailment' or 'not_entailment':", + "transfer_ori_acc": 82.31046931407943, + "transfer_atk_acc": 83.03249097472924 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 48.05, + "origin_prompt": "After examining the following expression, label its emotion as either 'positive' or 'negative':", + "atk_acc": 2.06, + "atk_prompt": "After examining the following expression, label its emo𝚝ion as neither 'positive' or 'negative': ", + "transfer_ori_acc": 93.23394495412845, + "transfer_atk_acc": 93.46330275229357 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 37.5, + "origin_prompt": "As an emotion detector, determine if the provided passage conveys a 'positive' or 'negative' sentiment. Classify:", + "atk_acc": 1.03, + "atk_prompt": "Because an emotion detector, determine if the povided passgae convey a 'positive' or 'negative' ambience. Classi𝚏y: ", + "transfer_ori_acc": 94.83944954128441, + "transfer_atk_acc": 91.85779816513761 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 43.66, + "origin_prompt": "Does the relationship between the given sentences represent entailment or not_entailment? Respond with 'entailment' or 'not_entailment'.", + "atk_acc": 1.41, + "atk_prompt": "Deos the relationship between the given sentences represent entailment or not_entailment? Answering with 'entailment' or 'not_entailment'.", + "transfer_ori_acc": 61.97183098591549, + "transfer_atk_acc": 59.154929577464785 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 43.66, + "origin_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment':", + "atk_acc": 0.0, + "atk_prompt": "During your roⅼe as an entailment analysis tool, evaluating the relationship between the given sentences and classifying it as 'entailment' or 'not_entailment':", + "transfer_ori_acc": 56.33802816901409, + "transfer_atk_acc": 57.74647887323944 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 25.67, + "origin_prompt": "Evaluate the multiple-choice question about {} and select the most fitting response from 'A', 'B', 'C', or 'D'.", + "atk_acc": 9.92, + "atk_prompt": "Assess the mul𝚝iple-choice question about {} and seⅼect the most fitting response from 'A', 'B', 'C', or 'D'. ", + "transfer_ori_acc": 44.50354609929078, + "transfer_atk_acc": 44.50354609929078 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 24.39, + "origin_prompt": "With your knowledge of {}, tackle the following multiple-choice question by choosing 'A', 'B', 'C', or 'D'.", + "atk_acc": 9.57, + "atk_prompt": "With your knowledge of {}, tackle the 𝚏ollowing multiple-choіce problem by choosing 'A', 'B', 'C', or 'D'.", + "transfer_ori_acc": 43.97163120567376, + "transfer_atk_acc": 45.744680851063826 + } +] \ No newline at end of file diff --git a/results_transfer/vicuna-13b_google-flan-t5-large/textbugger_3_shot.json b/results_transfer/vicuna-13b_google-flan-t5-large/textbugger_3_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..ed59451470b21f48fa34832b8bfc2f64129f5a92 --- /dev/null +++ b/results_transfer/vicuna-13b_google-flan-t5-large/textbugger_3_shot.json @@ -0,0 +1,162 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 62.2, + "origin_prompt": "Assess the grammatical structure of the given sentence and classify it as 'Acceptable' or 'Unacceptable'.", + "atk_acc": 2.2, + "atk_prompt": "Assess the grammatically s𝚝ructure of the ɡiven sentecne and clasisfy it as 'Acceptable' or 'Unacceptable'. ", + "transfer_ori_acc": 76.3, + "transfer_atk_acc": 77.10000000000001 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 48.0, + "origin_prompt": "While performing grammar analysis, classify the grammar of the following sentence as 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar.", + "atk_acc": 0.0, + "atk_prompt": "Whie peⲅforming grammar analysis, ϲlassify the grammatical of the following sentence as 'acceptable' for correct grammar or 'unacceptable' for incorrect grammr. ", + "transfer_ori_acc": 75.7, + "transfer_atk_acc": 75.8 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 68.38, + "origin_prompt": "Assess if the two given sentences have equivalent meanings by selecting 'equivalent' or 'not_equivalent'.", + "atk_acc": 44.61, + "atk_prompt": "Assessing if the t wo given sentences have equivalent meanings by selectiոg 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 82.84313725490196, + "transfer_atk_acc": 81.12745098039215 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 68.14, + "origin_prompt": "In the role of a sentence comparison analyst, assess the provided sentences and indicate if they convey the same meaning by selecting 'equivalent' or 'not_equivalent'.", + "atk_acc": 44.36, + "atk_prompt": "Onto the role of a senteոce c omparison analys𝚝, assess the given sentences and indicate if they convey the same meaning by selecting 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 80.3921568627451, + "transfer_atk_acc": 80.14705882352942 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 38.9, + "origin_prompt": "Analyze the context and question, and indicate if the context entails the answer by choosing 'entailment' or 'not_entailment'.", + "atk_acc": 0.7, + "atk_prompt": "Analyze the context and question, and depicts if the context involves the ans wer by choosiոg 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 87.3, + "transfer_atk_acc": 85.0 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 41.4, + "origin_prompt": "As a semantic researcher, evaluate whether the provided context supports the answer to the question and choose 'entailment' or 'not_entailment'.", + "atk_acc": 0.0, + "atk_prompt": "As a semantic reesarcher, evaluating whether the provided context supports the answering to the question and chosen 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 87.3, + "transfer_atk_acc": 85.8 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 41.8, + "origin_prompt": "Do the following expressions mean the same thing? Provide your answer as 'equivalent' or 'not_equivalent'.", + "atk_acc": 26.7, + "atk_prompt": "Wo the following expressions meant the same thing? Provide your aոswer as 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 87.8, + "transfer_atk_acc": 87.2 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 33.6, + "origin_prompt": "In your role as a question comparison tool, assess the following pair of questions and classify them as 'equivalent' or 'not_equivalent'.", + "atk_acc": 15.0, + "atk_prompt": "For your roⅼe as a question compɑrison tool, evaluate the 𝚏ollowing pair of questions and categorized them as 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 85.2, + "transfer_atk_acc": 83.5 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 43.68, + "origin_prompt": "Review the two statements and categorize their relationship as either 'entailment' or 'not_entailment'.", + "atk_acc": 2.53, + "atk_prompt": "Revіew the two sta𝚝ements and categorize their relationship as or 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 84.83754512635379, + "transfer_atk_acc": 83.39350180505414 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 36.82, + "origin_prompt": "In the role of a semantic relationship analyzer, examine the connection between the given sentences and decide if they exhibit entailment or not_entailment. Answer with 'entailment' or 'not_entailment'.", + "atk_acc": 0.0, + "atk_prompt": "In the role of a semantic relationship analyzeⲅ, examine the connection between the givn sentences and determine if they exhibi𝚝 entailment or not_entailment. Answers with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 81.2274368231047, + "transfer_atk_acc": 82.31046931407943 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 18.12, + "origin_prompt": "Evaluate the sentiment of the given text and classify it as 'positive' or 'negative'.", + "atk_acc": 0.0, + "atk_prompt": "Evaluate the vibe of the gievn text and calssify it as 'positive' or 'negative'. ", + "transfer_ori_acc": 94.61009174311926, + "transfer_atk_acc": 94.61009174311926 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 33.37, + "origin_prompt": "Taking on the role of an emotion classifier, specify if the provided phrase is 'positive' or 'negative'.", + "atk_acc": 0.57, + "atk_prompt": "Taking on the roles of an emotions classi𝚏ier, speci𝚏y if the povided phrase is 'positive' or 'negative'. ", + "transfer_ori_acc": 94.38073394495413, + "transfer_atk_acc": 94.26605504587155 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 23.94, + "origin_prompt": "Does the relationship between the given sentences represent entailment or not_entailment? Respond with 'entailment' or 'not_entailment'.", + "atk_acc": 0.0, + "atk_prompt": "Does the relations between the given sentences ⲅepresent entailment or not_entailment? Respоnd with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 63.38028169014085, + "transfer_atk_acc": 60.56338028169014 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 16.9, + "origin_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment'.", + "atk_acc": 1.41, + "atk_prompt": "During your role as an entailment aalysis tool, assess the relationshiр between the given sentences and classify it as 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 57.74647887323944, + "transfer_atk_acc": 57.74647887323944 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 9.1, + "origin_prompt": "Answer the subsequent multiple-choice question about {} by picking the right option among 'A', 'B', 'C', or 'D'.", + "atk_acc": 3.5, + "atk_prompt": "Responding the subequent multipl-echoice question about {} by pick the right op𝚝ion amongst 'A', 'B', 'C', or 'D'. ", + "transfer_ori_acc": 45.92198581560284, + "transfer_atk_acc": 45.92198581560284 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 9.68, + "origin_prompt": "With your understanding of {}, kindly answer the subsequent multiple-choice question by selecting 'A', 'B', 'C', or 'D'.", + "atk_acc": 2.45, + "atk_prompt": "Witհ your understand of {}, 𝒌indly answer the successive mսltiple-choice question by selecting 'A', 'B', 'C', or 'D'.", + "transfer_ori_acc": 45.39007092198582, + "transfer_atk_acc": 44.858156028368796 + } +] \ No newline at end of file diff --git a/results_transfer/vicuna-13b_google-flan-t5-large/textfooler_0_shot.json b/results_transfer/vicuna-13b_google-flan-t5-large/textfooler_0_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..7677bee15db5eeac3ed06b0c2b1f535027a311fe --- /dev/null +++ b/results_transfer/vicuna-13b_google-flan-t5-large/textfooler_0_shot.json @@ -0,0 +1,162 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 69.0, + "origin_prompt": "Assess the following sentence and determine if it is grammatically correct. Respond with 'Acceptable' or 'Unacceptable':", + "atk_acc": 14.0, + "atk_prompt": "Assess the next sentence and decide if it is grammatically correct. Accommodate with 'Acceptable' or 'Unacceptable':", + "transfer_ori_acc": 78.0, + "transfer_atk_acc": 75.5 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 54.9, + "origin_prompt": "As a tool for determining grammatical correctness, review the sentence and categorize its grammar as either 'acceptable' or 'unacceptable':", + "atk_acc": 0.0, + "atk_prompt": "Oj a toolbox for defines pronouns correcting, revisions the penalties and sorting its dictionary as either 'acceptable' or 'unacceptable':", + "transfer_ori_acc": 74.9, + "transfer_atk_acc": 67.5 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 67.16, + "origin_prompt": "Are the meanings of the following pair of sentences the same? Answer with 'equivalent' or 'not_equivalent'.", + "atk_acc": 0.49, + "atk_prompt": "Are the meanings of the following couple of sentences the same? Cope with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 82.1078431372549, + "transfer_atk_acc": 81.61764705882352 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 39.46, + "origin_prompt": "In the role of a textual comparison specialist, examine the given sentences and decide if they share the same meaning, responding with 'equivalent' or 'not_equivalent'.", + "atk_acc": 0.0, + "atk_prompt": "About the role of a textual comparison specialist, examine the given judgements and decide if they exchanging the same meaning, responses with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 81.37254901960785, + "transfer_atk_acc": 83.33333333333334 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 30.5, + "origin_prompt": "Review the given context and question, and decide if the context contains enough information to support the answer by selecting 'entailment' or 'not_entailment'.", + "atk_acc": 0.0, + "atk_prompt": "Reconsidered the given context and question, and ascertain if the context comprises enough information to favour the answer by choices 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 85.2, + "transfer_atk_acc": 89.1 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 35.3, + "origin_prompt": "As a language expert, assess if the given context entails the answer to the question and respond with 'entailment' or 'not_entailment'.", + "atk_acc": 0.6, + "atk_prompt": "As a language expert, weighed if the delivered framework translates the answer to the question and respondent with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 88.4, + "transfer_atk_acc": 83.6 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 23.9, + "origin_prompt": "Can these two statements be considered equal in meaning? Answer with 'equivalent' or 'not_equivalent'.", + "atk_acc": 0.0, + "atk_prompt": "Kan these two languages happen revisited equitable in signify? Meet with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 87.8, + "transfer_atk_acc": 65.7 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 16.9, + "origin_prompt": "In the role of a question similarity analyzer, examine the meaning of the provided questions and decide if they are equivalent, answering with 'equivalent' for equivalent questions or 'not_equivalent' for different questions.", + "atk_acc": 0.1, + "atk_prompt": "Onto the role of a question similarity analyzer, consider the significance of the envisaged matters and identify if they are equivalent, answering with 'equivalent' for equivalent questions or 'not_equivalent' for many matters. ", + "transfer_ori_acc": 87.1, + "transfer_atk_acc": 85.9 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 47.65, + "origin_prompt": "Please classify the relationship between the provided sentences as 'entailment' or 'not_entailment'.", + "atk_acc": 0.72, + "atk_prompt": "Invite classed the coitus between the foreseen priors as 'entailment' or 'not_entailment'.", + "transfer_ori_acc": 85.1985559566787, + "transfer_atk_acc": 84.83754512635379 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 48.01, + "origin_prompt": "As an entailment identification system, examine the connection between the following sentences and respond with 'entailment' or 'not_entailment':", + "atk_acc": 0.0, + "atk_prompt": "Because an entailment characterization regimes, considering the login between the following sentence and meet with 'entailment' or 'not_entailment':", + "transfer_ori_acc": 84.11552346570397, + "transfer_atk_acc": 83.03249097472924 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 53.9, + "origin_prompt": "Please identify the emotional tone of this passage: 'positive' or 'negative'?", + "atk_acc": 5.85, + "atk_prompt": "Invited specify the affective tone of this pass: 'positive' or 'negative'? ", + "transfer_ori_acc": 93.92201834862385, + "transfer_atk_acc": 94.26605504587155 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 42.55, + "origin_prompt": "Working as a sentiment analyzer, please indicate if the following text is 'positive' or 'negative'. Classify:", + "atk_acc": 3.21, + "atk_prompt": "Working as a sentiment analyzer, please indicate if the following text is 'positive' or 'negative'. Ranked: ", + "transfer_ori_acc": 95.29816513761467, + "transfer_atk_acc": 94.72477064220183 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 43.66, + "origin_prompt": "Determine if the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment'.", + "atk_acc": 1.41, + "atk_prompt": "Define if the given pair of priors exhibits entailment or not_entailment. Answers with 'entailment' or 'not_entailment'.", + "transfer_ori_acc": 61.97183098591549, + "transfer_atk_acc": 64.7887323943662 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 43.66, + "origin_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment':", + "atk_acc": 0.0, + "atk_prompt": "Onto your role as an entailment analysing tool, appraisals the relationship between the given condemnation and classed it as 'entailment' or 'not_entailment':", + "transfer_ori_acc": 56.33802816901409, + "transfer_atk_acc": 59.154929577464785 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 27.07, + "origin_prompt": "For each multiple-choice question about {}, identify the correct answer by selecting 'A', 'B', 'C', or 'D'.", + "atk_acc": 12.72, + "atk_prompt": "Among all multiple-choice subject about {}, find the accurate answer by chooses 'A', 'B', 'C', or 'D'. ", + "transfer_ori_acc": 45.744680851063826, + "transfer_atk_acc": 46.27659574468085 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 24.85, + "origin_prompt": "As an expert in {}, respond to the following multiple-choice question by selecting 'A', 'B', 'C', or 'D'.", + "atk_acc": 13.89, + "atk_prompt": "As an expert in {}, answering to the following multiple-choice things by picking 'A', 'B', 'C', or 'D'.", + "transfer_ori_acc": 44.680851063829785, + "transfer_atk_acc": 44.680851063829785 + } +] \ No newline at end of file diff --git a/results_transfer/vicuna-13b_google-flan-t5-large/textfooler_3_shot.json b/results_transfer/vicuna-13b_google-flan-t5-large/textfooler_3_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..cceae4f964169604f057afddc200b56caadca345 --- /dev/null +++ b/results_transfer/vicuna-13b_google-flan-t5-large/textfooler_3_shot.json @@ -0,0 +1,162 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 47.7, + "origin_prompt": "Examine the sentence and decide if its grammar is 'Acceptable' or 'Unacceptable'.", + "atk_acc": 8.0, + "atk_prompt": "Discuss the sentencing and choosing if its language is 'Acceptable' or 'Unacceptable'. ", + "transfer_ori_acc": 76.3, + "transfer_atk_acc": 74.1 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 35.2, + "origin_prompt": "Working as a grammar classifier, identify whether the given sentence has correct grammar, and respond with 'acceptable' for correct sentences or 'unacceptable' for incorrect ones.", + "atk_acc": 0.0, + "atk_prompt": "Working as a grammar classifier, identify whether the given sentence has correct jargon, and cope with 'acceptable' for correct punishments or 'unacceptable' for inappropriate ones. ", + "transfer_ori_acc": 75.4, + "transfer_atk_acc": 74.1 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 69.36, + "origin_prompt": "Determine if the meanings of the following sentences are semantically equivalent by responding with 'equivalent' or 'not_equivalent'.", + "atk_acc": 35.54, + "atk_prompt": "Decide if the senses of the following sentence are semantically equivalent by meet with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 81.37254901960785, + "transfer_atk_acc": 79.65686274509804 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 68.14, + "origin_prompt": "In the role of a sentence comparison analyst, assess the provided sentences and indicate if they convey the same meaning by selecting 'equivalent' or 'not_equivalent'.", + "atk_acc": 13.24, + "atk_prompt": "Towards the feature of a judgements versus psychoanalyst, reviews the supplied sentences and clarified if they convey the same senses by electing 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 80.3921568627451, + "transfer_atk_acc": 80.14705882352942 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 42.7, + "origin_prompt": "Review the given context and question, and decide if the context contains enough information to support the answer by selecting 'entailment' or 'not_entailment'.", + "atk_acc": 0.0, + "atk_prompt": "Discuss the given context and tura, and decide if the context encompasses enough information to support the satisfy by takes 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 85.2, + "transfer_atk_acc": 87.8 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 41.4, + "origin_prompt": "As a semantic researcher, evaluate whether the provided context supports the answer to the question and choose 'entailment' or 'not_entailment'.", + "atk_acc": 0.1, + "atk_prompt": "As a semantic researcher, appraisals whether the provided context supports the answer to the question and taking 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 87.3, + "transfer_atk_acc": 72.6 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 38.1, + "origin_prompt": "Assess whether the following statements are identical in meaning by answering 'equivalent' or 'not_equivalent'.", + "atk_acc": 21.5, + "atk_prompt": "Estimates whether the following pronouncements are analogous in connotation by answering 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 86.9, + "transfer_atk_acc": 86.8 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 33.0, + "origin_prompt": "In the role of a question similarity analyzer, examine the meaning of the provided questions and decide if they are equivalent, answering with 'equivalent' for equivalent questions or 'not_equivalent' for different questions.", + "atk_acc": 13.9, + "atk_prompt": "Across the rol of a trouble similarity analyzer, exploring the meaning of the lent questions and select if they are equivalent, answered with 'equivalent' for equivalent theme or 'not_equivalent' for various questions. ", + "transfer_ori_acc": 86.5, + "transfer_atk_acc": 85.8 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 43.68, + "origin_prompt": "Review the two statements and categorize their relationship as either 'entailment' or 'not_entailment'.", + "atk_acc": 2.17, + "atk_prompt": "Investigated the two statements and categorize their relation as either 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 84.83754512635379, + "transfer_atk_acc": 84.47653429602889 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 36.82, + "origin_prompt": "In the role of a semantic relationship analyzer, examine the connection between the given sentences and decide if they exhibit entailment or not_entailment. Answer with 'entailment' or 'not_entailment'.", + "atk_acc": 0.0, + "atk_prompt": "In the role of a semantic relationship profiler, review the ties between the given sentences and determining if they fairs entailment or not_entailment. Answers with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 81.2274368231047, + "transfer_atk_acc": 82.67148014440433 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 18.12, + "origin_prompt": "Evaluate the sentiment of the given text and classify it as 'positive' or 'negative'.", + "atk_acc": 0.0, + "atk_prompt": "Assessment the mood of the made laws and rank it as 'positive' or 'negative'. ", + "transfer_ori_acc": 94.61009174311926, + "transfer_atk_acc": 94.61009174311926 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 26.38, + "origin_prompt": "In the role of a sentiment analysis tool, respond with 'positive' or 'negative' to classify this statement.", + "atk_acc": 0.0, + "atk_prompt": "During the rol of a emotion exploring mechanism, respond with 'positive' or 'negative' to rank this affirmations. ", + "transfer_ori_acc": 95.52752293577981, + "transfer_atk_acc": 95.18348623853211 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 23.94, + "origin_prompt": "Does the relationship between the given sentences represent entailment or not_entailment? Respond with 'entailment' or 'not_entailment'.", + "atk_acc": 0.0, + "atk_prompt": "Did the coitus between the given sentence represent entailment or not_entailment? Respondent with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 63.38028169014085, + "transfer_atk_acc": 61.97183098591549 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 16.9, + "origin_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment'.", + "atk_acc": 0.0, + "atk_prompt": "Between your role as an entailment valuation appliance, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 57.74647887323944, + "transfer_atk_acc": 60.56338028169014 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 9.1, + "origin_prompt": "Answer the subsequent multiple-choice question about {} by picking the right option among 'A', 'B', 'C', or 'D'.", + "atk_acc": 3.73, + "atk_prompt": "Answer the consecutive multiple-choice questions about {} by choosing the ok option among 'A', 'B', 'C', or 'D'. ", + "transfer_ori_acc": 45.92198581560284, + "transfer_atk_acc": 45.0354609929078 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 9.22, + "origin_prompt": "With your understanding of {}, kindly answer the subsequent multiple-choice question by selecting 'A', 'B', 'C', or 'D'.", + "atk_acc": 4.61, + "atk_prompt": "With your understand of {}, gracefully answer the forth multiple-choice question by picking 'A', 'B', 'C', or 'D'.", + "transfer_ori_acc": 45.39007092198582, + "transfer_atk_acc": 45.92198581560284 + } +] \ No newline at end of file diff --git a/results_transfer/vicuna-13b_google-flan-t5-large/translation_0_shot.json b/results_transfer/vicuna-13b_google-flan-t5-large/translation_0_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..96303ba85dfcc6dcf8242b7eb4a90024533720ae --- /dev/null +++ b/results_transfer/vicuna-13b_google-flan-t5-large/translation_0_shot.json @@ -0,0 +1,82 @@ +[ + { + "dataset": "cola", + "type": "translation", + "origin_acc": 69.0, + "origin_prompt": "Assess the following sentence and determine if it is grammatically correct. Respond with 'Acceptable' or 'Unacceptable':", + "atk_acc": 5.8, + "atk_prompt": "As a tool for analyzing grammatical correctness, it categorizes the grammatical rules of the sentence as 'acceptable' or 'unacceptable'.", + "transfer_ori_acc": 78.0, + "transfer_atk_acc": 72.8 + }, + { + "dataset": "mrpc", + "type": "translation", + "origin_acc": 67.16, + "origin_prompt": "Are the meanings of the following pair of sentences the same? Answer with 'equivalent' or 'not_equivalent'.", + "atk_acc": 0.74, + "atk_prompt": "Please determine whether the following two sentences are 'equivalent' or 'not_equivalent' according to the standard of semantic comparison.", + "transfer_ori_acc": 82.1078431372549, + "transfer_atk_acc": 81.12745098039215 + }, + { + "dataset": "qnli", + "type": "translation", + "origin_acc": 38.4, + "origin_prompt": "As a textual inference expert, analyze if the answer to the question can be deduced from the provided context and select 'entailment' or 'not_entailment'.", + "atk_acc": 2.4, + "atk_prompt": "Determine whether the information provided in the context necessarily leads to the answer to the question asked and indicate 'entailment' or 'not_entailment'.", + "transfer_ori_acc": 78.10000000000001, + "transfer_atk_acc": 89.8 + }, + { + "dataset": "qqp", + "type": "translation", + "origin_acc": 30.0, + "origin_prompt": "Please indicate if the following pair of sentences share the same meaning by responding with 'equivalent' or 'not_equivalent'.", + "atk_acc": 0.5, + "atk_prompt": "Work on text sorting tasks labeled 'equivalent' or 'not_equivalent'.", + "transfer_ori_acc": 87.1, + "transfer_atk_acc": 86.8 + }, + { + "dataset": "rte", + "type": "translation", + "origin_acc": 51.26, + "origin_prompt": "In the role of a semantic relationship analyzer, examine the connection between the given sentences and decide if they exhibit entailment or not_entailment. Answer with 'entailment' or 'not_entailment':", + "atk_acc": 5.42, + "atk_prompt": "Classify it as 'entailment' or 'not_entailment' based on the content of the sentence.", + "transfer_ori_acc": 82.31046931407943, + "transfer_atk_acc": 85.5595667870036 + }, + { + "dataset": "sst2", + "type": "translation", + "origin_acc": 53.9, + "origin_prompt": "Please identify the emotional tone of this passage: 'positive' or 'negative'?", + "atk_acc": 9.29, + "atk_prompt": "Please rate the emotion of this sentence and classify it as 'positive' or 'negative'.", + "transfer_ori_acc": 93.92201834862385, + "transfer_atk_acc": 94.4954128440367 + }, + { + "dataset": "wnli", + "type": "translation", + "origin_acc": 43.66, + "origin_prompt": "Does the relationship between the given sentences represent entailment or not_entailment? Respond with 'entailment' or 'not_entailment'.", + "atk_acc": 5.63, + "atk_prompt": "Determine whether the given sentences have a cause-effect relationship as an engagement analysis tool and categorize them as 'entailment' or 'not_entailment'.", + "transfer_ori_acc": 61.97183098591549, + "transfer_atk_acc": 60.56338028169014 + }, + { + "dataset": "mmlu", + "type": "translation", + "origin_acc": 27.07, + "origin_prompt": "For each multiple-choice question about {}, identify the correct answer by selecting 'A', 'B', 'C', or 'D'.", + "atk_acc": 18.09, + "atk_prompt": "For the multiple choice problem about, choose the correct answer for '{}' from 'A', 'B', 'C', or 'D'.", + "transfer_ori_acc": 45.744680851063826, + "transfer_atk_acc": 43.262411347517734 + } +] \ No newline at end of file diff --git a/results_transfer/vicuna-13b_google-flan-t5-large/translation_3_shot.json b/results_transfer/vicuna-13b_google-flan-t5-large/translation_3_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..db404a7b1857d3535323a6af649cbcca6168cbf9 --- /dev/null +++ b/results_transfer/vicuna-13b_google-flan-t5-large/translation_3_shot.json @@ -0,0 +1,82 @@ +[ + { + "dataset": "cola", + "type": "translation", + "origin_acc": 65.2, + "origin_prompt": "Determine if the grammar of the given sentence is 'Acceptable' or 'Unacceptable'.", + "atk_acc": 5.3, + "atk_prompt": "Please check that the following sentences are grammatically correct and categorize them as 'acceptable' or 'unacceptable'.", + "transfer_ori_acc": 76.8, + "transfer_atk_acc": 75.2 + }, + { + "dataset": "mrpc", + "type": "translation", + "origin_acc": 69.36, + "origin_prompt": "Determine if the meanings of the following sentences are semantically equivalent by responding with 'equivalent' or 'not_equivalent'.", + "atk_acc": 54.9, + "atk_prompt": "As a semantic comparison specialist, analyze the two data statements and insert them into one of the following categories: 'equivalent' or 'not_equivalent'.", + "transfer_ori_acc": 81.37254901960785, + "transfer_atk_acc": 81.61764705882352 + }, + { + "dataset": "qnli", + "type": "translation", + "origin_acc": 42.9, + "origin_prompt": "As a linguistic consultant, decide if the answer to the question is logically supported by the provided context and respond with 'entailment' or 'not_entailment'.", + "atk_acc": 1.6, + "atk_prompt": "Grasp the relations between sentences, determine if a given sentence necessarily contains the meaning of another sentence and respond with 'entailment' or 'not_entailment'.", + "transfer_ori_acc": 85.1, + "transfer_atk_acc": 87.8 + }, + { + "dataset": "qqp", + "type": "translation", + "origin_acc": 41.8, + "origin_prompt": "Do the following expressions mean the same thing? Provide your answer as 'equivalent' or 'not_equivalent'.", + "atk_acc": 21.7, + "atk_prompt": "For text classification tasks, use the labels 'equivalent' or 'not_equivalent' to determine the equivalence of statements.", + "transfer_ori_acc": 87.8, + "transfer_atk_acc": 85.9 + }, + { + "dataset": "rte", + "type": "translation", + "origin_acc": 50.54, + "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.", + "atk_acc": 3.97, + "atk_prompt": "Using implication analysis, determine whether the given sentences have a cause-effect relationship and categorize them as 'entailment' or 'not_entailment'.", + "transfer_ori_acc": 85.5595667870036, + "transfer_atk_acc": 84.83754512635379 + }, + { + "dataset": "sst2", + "type": "translation", + "origin_acc": 33.37, + "origin_prompt": "Taking on the role of an emotion classifier, specify if the provided phrase is 'positive' or 'negative'.", + "atk_acc": 0.46, + "atk_prompt": "Please determine whether the emotion of the sentence is 'positive' or 'negative' and categorize it.", + "transfer_ori_acc": 94.38073394495413, + "transfer_atk_acc": 93.80733944954129 + }, + { + "dataset": "wnli", + "type": "translation", + "origin_acc": 32.39, + "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.", + "atk_acc": 0.0, + "atk_prompt": "Using implication analysis, determine whether the given sentences have a cause-effect relationship and categorize them as 'entailment' or 'not_entailment'.", + "transfer_ori_acc": 67.6056338028169, + "transfer_atk_acc": 63.38028169014085 + }, + { + "dataset": "mmlu", + "type": "translation", + "origin_acc": 10.39, + "origin_prompt": "Answer the following multiple-choice question about {} by selecting the correct option: 'A', 'B', 'C', or 'D'.", + "atk_acc": 4.43, + "atk_prompt": "Choose the correct answer from the options 'A', 'B', 'C', or 'D' for the following questions about {}.", + "transfer_ori_acc": 45.39007092198582, + "transfer_atk_acc": 45.744680851063826 + } +] \ No newline at end of file diff --git a/results_transfer/vicuna-13b_google-flan-ul2/bertattack_0_shot.json b/results_transfer/vicuna-13b_google-flan-ul2/bertattack_0_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..7fbd4e95cfe6e0be162274cbbb8be009acd482f5 --- /dev/null +++ b/results_transfer/vicuna-13b_google-flan-ul2/bertattack_0_shot.json @@ -0,0 +1,162 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 69.0, + "origin_prompt": "Assess the following sentence and determine if it is grammatically correct. Respond with 'Acceptable' or 'Unacceptable':", + "atk_acc": 1.7, + "atk_prompt": "check the below verb and consider if it is grammatically complete. act with 'Acceptable' or 'Unacceptable':", + "transfer_ori_acc": 86.2, + "transfer_atk_acc": 80.0 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 57.1, + "origin_prompt": "Functioning as a grammar evaluation tool, analyze the given sentence and decide if it is grammatically correct, responding with 'acceptable' or 'unacceptable':", + "atk_acc": 0.6, + "atk_prompt": "as as a grammar evaluation resource, analyze the next phrase and decide if it is grammatical+ correct, respond with 'acceptable' or 'unacceptable':", + "transfer_ori_acc": 85.9, + "transfer_atk_acc": 86.0 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 67.16, + "origin_prompt": "Are the meanings of the following pair of sentences the same? Answer with 'equivalent' or 'not_equivalent'.", + "atk_acc": 0.0, + "atk_prompt": "Are the meanings of the next in of sentences the same? q with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 87.74509803921569, + "transfer_atk_acc": 87.25490196078431 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 52.21, + "origin_prompt": "In your capacity as a semantic comparison tool, examine the given sentences and decide if their meanings are identical, responding with 'equivalent' or 'not_equivalent'.", + "atk_acc": 0.0, + "atk_prompt": "[UNK] your capacity as a semantic comparison tool, examine the given sentences and decide if their meanings are different, responding with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 87.00980392156863, + "transfer_atk_acc": 83.57843137254902 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 30.5, + "origin_prompt": "Review the given context and question, and decide if the context contains enough information to support the answer by selecting 'entailment' or 'not_entailment'.", + "atk_acc": 0.0, + "atk_prompt": "Review the given context and question, and decide if the context requires enough information to let the answer by choices 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 94.1, + "transfer_atk_acc": 94.19999999999999 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 38.4, + "origin_prompt": "As a textual inference expert, analyze if the answer to the question can be deduced from the provided context and select 'entailment' or 'not_entailment'.", + "atk_acc": 0.4, + "atk_prompt": "example a textual in expert, analyzing if the answer to the question can be deduced from the language context and think 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 93.30000000000001, + "transfer_atk_acc": 93.30000000000001 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 23.9, + "origin_prompt": "Can these two statements be considered equal in meaning? Answer with 'equivalent' or 'not_equivalent'.", + "atk_acc": 0.8, + "atk_prompt": "can these two symbols be considered equal in meaning? object with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 87.4, + "transfer_atk_acc": 87.6 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 0, + "origin_prompt": "", + "atk_acc": 100, + "atk_prompt": "", + "transfer_ori_acc": 0.0, + "transfer_atk_acc": 0.0 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 47.65, + "origin_prompt": "Please classify the relationship between the provided sentences as 'entailment' or 'not_entailment'.", + "atk_acc": 0.0, + "atk_prompt": "Please approach the relationship between the included sentences as 'entailment' or 'not_entailment'.", + "transfer_ori_acc": 93.14079422382672, + "transfer_atk_acc": 92.7797833935018 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 48.01, + "origin_prompt": "As an entailment identification system, examine the connection between the following sentences and respond with 'entailment' or 'not_entailment':", + "atk_acc": 1.44, + "atk_prompt": "she an entailment identification system, presented the connection between the following sentences and answered with 'entailment' or 'not_entailment':", + "transfer_ori_acc": 92.7797833935018, + "transfer_atk_acc": 92.4187725631769 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 48.17, + "origin_prompt": "After examining the following expression, label its emotion as either 'positive' or 'negative':", + "atk_acc": 0.57, + "atk_prompt": "After examination the given expression, type its emotion as of 'positive' or 'negative': ", + "transfer_ori_acc": 91.97247706422019, + "transfer_atk_acc": 92.88990825688074 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 37.27, + "origin_prompt": "As an emotion detector, determine if the provided passage conveys a 'positive' or 'negative' sentiment. Classify:", + "atk_acc": 0.46, + "atk_prompt": "As an emotion indicating, score if the provided reply conveys a 'positive' or 'negative' sentiment. classify: ", + "transfer_ori_acc": 95.87155963302753, + "transfer_atk_acc": 95.06880733944955 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 43.66, + "origin_prompt": "Determine if the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment'.", + "atk_acc": 0.0, + "atk_prompt": "Determine if the first pair of sentences demonstrate entailment or not_entailment. replied with 'entailment' or 'not_entailment'.", + "transfer_ori_acc": 78.87323943661971, + "transfer_atk_acc": 78.87323943661971 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 43.66, + "origin_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment':", + "atk_acc": 0.0, + "atk_prompt": "In your role as an entailment analysis tool, assessing the relationship between the formed sentences and classify it as 'entailment' or 'not_entailment':", + "transfer_ori_acc": 78.87323943661971, + "transfer_atk_acc": 78.87323943661971 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 25.09, + "origin_prompt": "Answer the subsequent multiple-choice question about {} by picking the right option among 'A', 'B', 'C', or 'D'.", + "atk_acc": 5.02, + "atk_prompt": "Answer the next multiple-s question about {} by picking the incorrect button or 'A', 'B', 'C', or 'D'. ", + "transfer_ori_acc": 52.4822695035461, + "transfer_atk_acc": 15.602836879432624 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 24.85, + "origin_prompt": "As an expert in {}, respond to the following multiple-choice question by selecting 'A', 'B', 'C', or 'D'.", + "atk_acc": 6.88, + "atk_prompt": "As an expert in {}, answer to the following multiple-guessing contestant by guessing 'A', 'B', 'C', or 'D'.", + "transfer_ori_acc": 53.191489361702125, + "transfer_atk_acc": 51.41843971631206 + } +] \ No newline at end of file diff --git a/results_transfer/vicuna-13b_google-flan-ul2/bertattack_3_shot.json b/results_transfer/vicuna-13b_google-flan-ul2/bertattack_3_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..cf2e448a9b0a3cac43292bfecd9866b5879ce382 --- /dev/null +++ b/results_transfer/vicuna-13b_google-flan-ul2/bertattack_3_shot.json @@ -0,0 +1,162 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 62.2, + "origin_prompt": "Assess the grammatical structure of the given sentence and classify it as 'Acceptable' or 'Unacceptable'.", + "atk_acc": 3.8, + "atk_prompt": "Assess the context t of the of sentence and judge it as 'Acceptable' or 'Unacceptable'. ", + "transfer_ori_acc": 86.2, + "transfer_atk_acc": 84.2 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 49.6, + "origin_prompt": "As a tool for determining grammatical correctness, review the sentence and categorize its grammar as either 'acceptable' or 'unacceptable'.", + "atk_acc": 0.0, + "atk_prompt": "should a be for calculating adjective correctness, review the sentence and categorize its i as or 'acceptable' or 'unacceptable'. ", + "transfer_ori_acc": 85.6, + "transfer_atk_acc": 85.0 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 69.36, + "origin_prompt": "Determine if the meanings of the following sentences are semantically equivalent by responding with 'equivalent' or 'not_equivalent'.", + "atk_acc": 33.33, + "atk_prompt": "Determine if the words of the following sentence are semanticdouble equivalent by thinking with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 84.55882352941177, + "transfer_atk_acc": 83.82352941176471 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 68.14, + "origin_prompt": "In the role of a sentence comparison analyst, assess the provided sentences and indicate if they convey the same meaning by selecting 'equivalent' or 'not_equivalent'.", + "atk_acc": 17.16, + "atk_prompt": "to the role of a face comparison instance, assess the following sentences and indicate if they describe the same person by selecting 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 86.27450980392157, + "transfer_atk_acc": 85.7843137254902 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 41.2, + "origin_prompt": "Based on the information in the context, decide if the answer to the question is justified by choosing 'entailment' or 'not_entailment'.", + "atk_acc": 0.0, + "atk_prompt": "Based on the information in the context, decide if the answer to the is is committed by versus 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 93.8, + "transfer_atk_acc": 93.89999999999999 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 35.5, + "origin_prompt": "As a textual inference expert, analyze if the answer to the question can be deduced from the provided context and select 'entailment' or 'not_entailment'.", + "atk_acc": 0.0, + "atk_prompt": "As a textual inference expert, write if the answer to the task can be deduced from the supplied instance and search 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 93.0, + "transfer_atk_acc": 93.60000000000001 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 41.8, + "origin_prompt": "Do the following expressions mean the same thing? Provide your answer as 'equivalent' or 'not_equivalent'.", + "atk_acc": 10.3, + "atk_prompt": "are the following expression do the same as? substitute your subject as 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 88.1, + "transfer_atk_acc": 88.0 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 33.0, + "origin_prompt": "In the role of a question similarity analyzer, examine the meaning of the provided questions and decide if they are equivalent, answering with 'equivalent' for equivalent questions or 'not_equivalent' for different questions.", + "atk_acc": 8.2, + "atk_prompt": "In the role of a topic similarities analyzer, examining the semantic of the questions the and decide if they are equivalent, answer with 'equivalent' for equivalent numbers or 'not_equivalent' for various respondents. ", + "transfer_ori_acc": 88.1, + "transfer_atk_acc": 88.3 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 50.54, + "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.", + "atk_acc": 1.08, + "atk_prompt": "pair the following opposite sentences entailment or not_entailment? answers me with \"entailment\" or \"not_entailment\", just 1 word. ", + "transfer_ori_acc": 93.14079422382672, + "transfer_atk_acc": 93.14079422382672 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 36.82, + "origin_prompt": "In the role of a semantic relationship analyzer, examine the connection between the given sentences and decide if they exhibit entailment or not_entailment. Answer with 'entailment' or 'not_entailment'.", + "atk_acc": 0.0, + "atk_prompt": "In the role of a descriptive relationship analyzeing, read the connection between the given sentences and learn if they exhibit entailment or not_entailment. answers with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 93.14079422382672, + "transfer_atk_acc": 93.50180505415162 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 12.5, + "origin_prompt": "After examining the following expression, label its emotion as either 'positive' or 'negative'.", + "atk_acc": 0.0, + "atk_prompt": "After examining the following action, type its emotion as either 'positive' or 'negative'. ", + "transfer_ori_acc": 92.77522935779817, + "transfer_atk_acc": 93.69266055045871 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 33.37, + "origin_prompt": "Taking on the role of an emotion classifier, specify if the provided phrase is 'positive' or 'negative'.", + "atk_acc": 0.11, + "atk_prompt": "[UNK] on the relationship of an emotion classtoken, choose if the provided phrase is 'positive' or 'negative'. ", + "transfer_ori_acc": 95.18348623853211, + "transfer_atk_acc": 95.29816513761467 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 23.94, + "origin_prompt": "Does the relationship between the given sentences represent entailment or not_entailment? Respond with 'entailment' or 'not_entailment'.", + "atk_acc": 0.0, + "atk_prompt": "Does the relationship between the given sentences represent entailment or not_entailment? q with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 76.05633802816901, + "transfer_atk_acc": 76.05633802816901 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 16.9, + "origin_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment'.", + "atk_acc": 0.0, + "atk_prompt": "[UNK] your role as an entailment and tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 77.46478873239437, + "transfer_atk_acc": 77.46478873239437 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 8.51, + "origin_prompt": "Evaluate the multiple-choice question about {} and select the most fitting response from 'A', 'B', 'C', or 'D'.", + "atk_acc": 3.55, + "atk_prompt": "i the multiple-a asked about {} and select the most fitting response from 'A', 'B', 'C', or 'D'. ", + "transfer_ori_acc": 53.90070921985816, + "transfer_atk_acc": 53.90070921985816 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 9.22, + "origin_prompt": "With your understanding of {}, kindly answer the subsequent multiple-choice question by selecting 'A', 'B', 'C', or 'D'.", + "atk_acc": 3.37, + "atk_prompt": "With your understand of {}, self answer the subsequent multiple-chosen questions by stating 'A', 'B', 'C', or 'D'.", + "transfer_ori_acc": 53.72340425531915, + "transfer_atk_acc": 53.191489361702125 + } +] \ No newline at end of file diff --git a/results_transfer/vicuna-13b_google-flan-ul2/checklist_3_shot.json b/results_transfer/vicuna-13b_google-flan-ul2/checklist_3_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..1dd70e915a1f22c7f2749a8f6f1f3cce836d6ab8 --- /dev/null +++ b/results_transfer/vicuna-13b_google-flan-ul2/checklist_3_shot.json @@ -0,0 +1,162 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 62.2, + "origin_prompt": "Assess the grammatical structure of the given sentence and classify it as 'Acceptable' or 'Unacceptable'.", + "atk_acc": 17.4, + "atk_prompt": "Assess the grammatical structure of the given sentence and classify it as 'Acceptable' or 'Unacceptable' Q47Jmd4lMV. ", + "transfer_ori_acc": 86.2, + "transfer_atk_acc": 86.3 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 35.2, + "origin_prompt": "Working as a grammar classifier, identify whether the given sentence has correct grammar, and respond with 'acceptable' for correct sentences or 'unacceptable' for incorrect ones.", + "atk_acc": 2.4, + "atk_prompt": "Working as a grammar classifier, identify whether the given sentence has correct grammar, and respond with 'acceptable' for correct sentences or 'unacceptable' for incorrect ones yh912BU8T0. ", + "transfer_ori_acc": 85.9, + "transfer_atk_acc": 86.3 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 69.36, + "origin_prompt": "Determine if the meanings of the following sentences are semantically equivalent by responding with 'equivalent' or 'not_equivalent'.", + "atk_acc": 62.01, + "atk_prompt": "Determine if the meanings of the following sentences are semantically equivalent by responding with 'equivalent' or 'not_equivalent' SFPCqpiClT. ", + "transfer_ori_acc": 84.55882352941177, + "transfer_atk_acc": 84.31372549019608 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 68.14, + "origin_prompt": "In the role of a sentence comparison analyst, assess the provided sentences and indicate if they convey the same meaning by selecting 'equivalent' or 'not_equivalent'.", + "atk_acc": 58.09, + "atk_prompt": "In the role of a sentence comparison analyst, assess the provided sentences and indicate if they convey the same meaning by selecting 'equivalent' or 'not_equivalent' xikCjkMydH. ", + "transfer_ori_acc": 86.27450980392157, + "transfer_atk_acc": 86.27450980392157 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 41.2, + "origin_prompt": "Based on the information in the context, decide if the answer to the question is justified by choosing 'entailment' or 'not_entailment'.", + "atk_acc": 1.1, + "atk_prompt": "Based on the information in the context, decide if the answer to the question is justified by choosing 'entailment' or 'not_entailment' zq0DcZ5dnI. ", + "transfer_ori_acc": 93.8, + "transfer_atk_acc": 93.7 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 41.4, + "origin_prompt": "As a semantic researcher, evaluate whether the provided context supports the answer to the question and choose 'entailment' or 'not_entailment'.", + "atk_acc": 1.0, + "atk_prompt": "As a semantic researcher, evaluate whether the provided context supports the answer to the question and choose 'entailment' or 'not_entailment' d6ZQ3u0GBQ. ", + "transfer_ori_acc": 94.0, + "transfer_atk_acc": 93.89999999999999 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 38.2, + "origin_prompt": "Assess whether the following statements are identical in meaning by answering 'equivalent' or 'not_equivalent'.", + "atk_acc": 23.5, + "atk_prompt": "Assess whether the following statements are identical in meaning by answering 'equivalent' or 'not_equivalent' 6a4Yn3RGVc. ", + "transfer_ori_acc": 88.0, + "transfer_atk_acc": 87.8 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 32.9, + "origin_prompt": "Working as a question classifier, identify whether the given questions share the same meaning, and respond with 'equivalent' for equivalent questions or 'not_equivalent' for different ones.", + "atk_acc": 25.0, + "atk_prompt": "Working as a question classifier, identify whether the given questions share the same meaning, and respond with 'equivalent' for equivalent questions or 'not_equivalent' for different ones AMsRIKZniY. ", + "transfer_ori_acc": 88.6, + "transfer_atk_acc": 88.5 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 43.68, + "origin_prompt": "Review the two statements and categorize their relationship as either 'entailment' or 'not_entailment'.", + "atk_acc": 7.22, + "atk_prompt": "Review the two statements and categorize their relationship as either 'entailment' or 'not_entailment' mzjVogkRhM. ", + "transfer_ori_acc": 92.7797833935018, + "transfer_atk_acc": 92.7797833935018 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 38.63, + "origin_prompt": "As an entailment identification system, examine the connection between the following sentences and respond with 'entailment' or 'not_entailment'.", + "atk_acc": 0.72, + "atk_prompt": "As an entailment identification system, examine the connection between the following sentences and respond with 'entailment' or 'not_entailment' Q47Jmd4lMV. ", + "transfer_ori_acc": 92.7797833935018, + "transfer_atk_acc": 92.7797833935018 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 14.45, + "origin_prompt": "Review this statement and decide whether it has a 'positive' or 'negative' sentiment.", + "atk_acc": 0.11, + "atk_prompt": "Review this statement and decide whether it has a 'positive' or 'negative' sentiment w52rwgo0Av. ", + "transfer_ori_acc": 96.78899082568807, + "transfer_atk_acc": 96.67431192660551 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 26.38, + "origin_prompt": "In the role of a sentiment analysis tool, respond with 'positive' or 'negative' to classify this statement.", + "atk_acc": 2.75, + "atk_prompt": "In the role of a sentiment analysis tool, respond with 'positive' or 'negative' to classify this statement Q47Jmd4lMV. ", + "transfer_ori_acc": 96.67431192660551, + "transfer_atk_acc": 96.78899082568807 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 28.17, + "origin_prompt": "Please classify the relationship between the provided sentences as 'entailment' or 'not_entailment'.", + "atk_acc": 8.45, + "atk_prompt": "Please classify the relationship between the provided sentences as 'entailment' or 'not_entailment' rzu98kF2KF. ", + "transfer_ori_acc": 77.46478873239437, + "transfer_atk_acc": 77.46478873239437 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 16.9, + "origin_prompt": "As an entailment identification system, examine the connection between the following sentences and respond with 'entailment' or 'not_entailment'.", + "atk_acc": 2.82, + "atk_prompt": "As an entailment identification system, examine the connection between the following sentences and respond with 'entailment' or 'not_entailment' rzu98kF2KF. ", + "transfer_ori_acc": 76.05633802816901, + "transfer_atk_acc": 76.05633802816901 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 10.15, + "origin_prompt": "In relation to the multiple-choice question on {}, please provide the accurate answer by choosing 'A', 'B', 'C', or 'D'.", + "atk_acc": 5.02, + "atk_prompt": "In relation to the multiple-choice question on {}, please provide the accurate answer by choosing 'A', 'B', 'C', or 'D' OB4KVJzIft. ", + "transfer_ori_acc": 53.54609929078015, + "transfer_atk_acc": 53.90070921985816 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 9.45, + "origin_prompt": "As someone well-versed in {}, please address the multiple-choice question below by selecting 'A', 'B', 'C', or 'D'.", + "atk_acc": 4.43, + "atk_prompt": "As someone well-versed in {}, please address the multiple-choice question below by selecting 'A', 'B', 'C', or 'D' w52rwgo0Av.", + "transfer_ori_acc": 53.01418439716312, + "transfer_atk_acc": 52.659574468085104 + } +] \ No newline at end of file diff --git a/results_transfer/vicuna-13b_google-flan-ul2/deepwordbug_0_shot.json b/results_transfer/vicuna-13b_google-flan-ul2/deepwordbug_0_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..a9ad34de81521516ff8fc7a979f4a252d65472c5 --- /dev/null +++ b/results_transfer/vicuna-13b_google-flan-ul2/deepwordbug_0_shot.json @@ -0,0 +1,162 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 69.2, + "origin_prompt": "Assess the following sentence and determine if it is grammatically correct. Respond with 'Acceptable' or 'Unacceptable':", + "atk_acc": 3.6, + "atk_prompt": "sAsess the ofllowing lsentence and determine if it is grammatiaclly correct. Rspond with 'Acceptable' or 'Unacceptable':", + "transfer_ori_acc": 86.2, + "transfer_atk_acc": 86.3 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 50.6, + "origin_prompt": "In the role of a syntax analyzer, examine the grammar of the provided sentence and decide if it is correct, answering with 'acceptable' for grammatically correct sentences or 'unacceptable' for incorrect ones:", + "atk_acc": 4.8, + "atk_prompt": "n the role of a syntax analyzer, examine the grammar of the provided sentence and decide if it is correct, answering with 'acceptable' for grammatically correct sentences or 'unacceptable' for incorret oKnes:", + "transfer_ori_acc": 85.6, + "transfer_atk_acc": 85.8 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 57.84, + "origin_prompt": "Compare the following sentences and determine if they share the same semantic meaning by responding with 'equivalent' or 'not_equivalent'.", + "atk_acc": 0.0, + "atk_prompt": "oCmpare the follAwing sentewces and determine if they share the same swmantic meaning by responding with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 86.02941176470588, + "transfer_atk_acc": 86.02941176470588 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 52.21, + "origin_prompt": "In your capacity as a semantic comparison tool, examine the given sentences and decide if their meanings are identical, responding with 'equivalent' or 'not_equivalent'.", + "atk_acc": 1.47, + "atk_prompt": "IYn your capaciMy as a semantic comparison ool, examine the given sentences and deXide if their meanings are identRcal, rseponding with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 87.00980392156863, + "transfer_atk_acc": 86.51960784313727 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 30.5, + "origin_prompt": "Review the given context and question, and decide if the context contains enough information to support the answer by selecting 'entailment' or 'not_entailment'.", + "atk_acc": 4.7, + "atk_prompt": "Rview the gven cnotext and question, and edcide if the context containZs enough information to Vsupport the ainswer by selecting 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 94.1, + "transfer_atk_acc": 94.19999999999999 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 35.3, + "origin_prompt": "As a language expert, assess if the given context entails the answer to the question and respond with 'entailment' or 'not_entailment'.", + "atk_acc": 2.1, + "atk_prompt": "As a language epxert, arssess if the gien contet entaxls the ansGer to the questAon and resNpond with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 94.19999999999999, + "transfer_atk_acc": 94.5 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 24.1, + "origin_prompt": "Analyze if the given set of sentences have the same connotation by answering with 'equivalent' or 'not_equivalent'.", + "atk_acc": 2.0, + "atk_prompt": "tAnalyze if the giCen est of sentUnces have the same connotatioq by answering with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 87.6, + "transfer_atk_acc": 88.6 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 16.9, + "origin_prompt": "In the role of a question similarity analyzer, examine the meaning of the provided questions and decide if they are equivalent, answering with 'equivalent' for equivalent questions or 'not_equivalent' for different questions.", + "atk_acc": 0.7, + "atk_prompt": "IZn the role of a questoion similarity naalyzer, examin the meJaning of the provided questions and Uecide if they are equivalent, answering with 'equivalent' for equivalent questions or 'not_equivalent' for dSfferent questioXns. ", + "transfer_ori_acc": 88.1, + "transfer_atk_acc": 88.5 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 41.16, + "origin_prompt": "Determine if the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment'.", + "atk_acc": 1.08, + "atk_prompt": "DetermiCne if the givePn pir of sentcnces disWlays entailment or not_entailment. Rtspond with 'entailment' or 'not_entailment'.", + "transfer_ori_acc": 93.50180505415162, + "transfer_atk_acc": 93.50180505415162 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 49.82, + "origin_prompt": "Working as an entailment classifier, identify whether the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment':", + "atk_acc": 1.08, + "atk_prompt": "orking as an entailment clasifier, idenStify whethre the givec ptair of sentInces displays entailment or not_entailment. ReKspond with 'entailment' or 'not_entailment':", + "transfer_ori_acc": 94.58483754512635, + "transfer_atk_acc": 93.14079422382672 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 48.05, + "origin_prompt": "After examining the following expression, label its emotion as either 'positive' or 'negative':", + "atk_acc": 0.8, + "atk_prompt": "kfter eximining the followinM expression, labe its emolion as egther 'positive' or 'negative': ", + "transfer_ori_acc": 91.97247706422019, + "transfer_atk_acc": 95.87155963302753 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 41.06, + "origin_prompt": "Functioning as a sentiment identification tool, assess if the following expression is 'positive' or 'negative'. Classify:", + "atk_acc": 27.18, + "atk_prompt": "Functionnig as a sentiment identification tool, assess if the flolowing xepression is 'positive' or 'negative'. ClUssify: ", + "transfer_ori_acc": 95.9862385321101, + "transfer_atk_acc": 96.3302752293578 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 43.66, + "origin_prompt": "Determine if the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment'.", + "atk_acc": 0.0, + "atk_prompt": "VDetermine if the giPen Apair of setences displUays entailment or not_entailment. QRespond with 'entailment' or 'not_entailment'.", + "transfer_ori_acc": 78.87323943661971, + "transfer_atk_acc": 80.28169014084507 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 43.66, + "origin_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment':", + "atk_acc": 0.0, + "atk_prompt": "kn your rohe as an entailment analysis tool, Zassess the relationship between the gUiven saentences and clasFsify it as 'entailment' or 'not_entailment':", + "transfer_ori_acc": 78.87323943661971, + "transfer_atk_acc": 77.46478873239437 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 25.67, + "origin_prompt": "Evaluate the multiple-choice question about {} and select the most fitting response from 'A', 'B', 'C', or 'D'.", + "atk_acc": 14.7, + "atk_prompt": "Evaluate the multple-choice question about {} and elect the most fitting respone from 'A', 'B', 'C', or 'D'. ", + "transfer_ori_acc": 53.01418439716312, + "transfer_atk_acc": 52.4822695035461 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 25.09, + "origin_prompt": "As a knowledgeable individual in {}, provide your response to the multiple-choice question by choosing 'A', 'B', 'C', or 'D'.", + "atk_acc": 16.45, + "atk_prompt": "As a kAnowledgeable indviidual in {}, provide your ersponse to the multipole-choice quesxion by choosing 'A', 'B', 'C', or 'D'.", + "transfer_ori_acc": 53.54609929078015, + "transfer_atk_acc": 53.01418439716312 + } +] \ No newline at end of file diff --git a/results_transfer/vicuna-13b_google-flan-ul2/deepwordbug_3_shot.json b/results_transfer/vicuna-13b_google-flan-ul2/deepwordbug_3_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..b6c8f28a239f479d8f56c5ab7261a3f05ef4f2e4 --- /dev/null +++ b/results_transfer/vicuna-13b_google-flan-ul2/deepwordbug_3_shot.json @@ -0,0 +1,162 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 62.0, + "origin_prompt": "Assess the grammatical structure of the given sentence and classify it as 'Acceptable' or 'Unacceptable'.", + "atk_acc": 11.4, + "atk_prompt": "Assess the grammatcal etructure of the given entence and classif it as 'Acceptable' or 'Unacceptable'. ", + "transfer_ori_acc": 86.2, + "transfer_atk_acc": 86.5 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 35.1, + "origin_prompt": "Working as a grammar classifier, identify whether the given sentence has correct grammar, and respond with 'acceptable' for correct sentences or 'unacceptable' for incorrect ones.", + "atk_acc": 0.0, + "atk_prompt": "Working as a grammar clsasifier, identify whether the given sentence has correct grammar, and reszond with 'acceptable' for correct sentences or 'unacceptable' for incorrect noes. ", + "transfer_ori_acc": 85.9, + "transfer_atk_acc": 86.2 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 67.65, + "origin_prompt": "Do the meanings of these two statements align? Indicate your answer with 'equivalent' or 'not_equivalent'.", + "atk_acc": 36.76, + "atk_prompt": "Zo the meanings of these Xwo statements Ualign? IndicNte your answeY with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 85.7843137254902, + "transfer_atk_acc": 85.5392156862745 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 69.12, + "origin_prompt": "In your capacity as a semantic assessment tool, evaluate the provided sentences and classify their meanings as 'equivalent' or 'not_equivalent'.", + "atk_acc": 34.31, + "atk_prompt": "nI your capacitCy as a sebmantic assgssment otol, evaluate the provided sentenceAs and Zlassify their meaniPgs as 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 86.51960784313727, + "transfer_atk_acc": 84.80392156862744 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 42.7, + "origin_prompt": "Review the given context and question, and decide if the context contains enough information to support the answer by selecting 'entailment' or 'not_entailment'.", + "atk_acc": 0.2, + "atk_prompt": "RevTiew the ginen cEntext and uestion, and decid if the context conatains enough iformation to uspport the answVer by selectin 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 94.1, + "transfer_atk_acc": 94.1 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 42.9, + "origin_prompt": "As a linguistic consultant, decide if the answer to the question is logically supported by the provided context and respond with 'entailment' or 'not_entailment'.", + "atk_acc": 1.8, + "atk_prompt": "Abs a linguistic consultant, decide if the answer to the question is logically supported by the provided Aontext and respGond with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 93.10000000000001, + "transfer_atk_acc": 93.30000000000001 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 38.1, + "origin_prompt": "Assess whether the following statements are identical in meaning by answering 'equivalent' or 'not_equivalent'.", + "atk_acc": 20.3, + "atk_prompt": "Asess hwether the following statemets are identiacl in meaning by answerinEg 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 88.0, + "transfer_atk_acc": 88.1 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 33.6, + "origin_prompt": "In your role as a question comparison tool, assess the following pair of questions and classify them as 'equivalent' or 'not_equivalent'.", + "atk_acc": 23.6, + "atk_prompt": "In your Irole as a questioGn comparison tolo, ussess the foMlowing pair of questrions and classify them as 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 88.7, + "transfer_atk_acc": 88.1 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 50.54, + "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.", + "atk_acc": 0.36, + "atk_prompt": "Aer the following wto sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one wor. ", + "transfer_ori_acc": 93.14079422382672, + "transfer_atk_acc": 93.14079422382672 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 36.82, + "origin_prompt": "In the role of a semantic relationship analyzer, examine the connection between the given sentences and decide if they exhibit entailment or not_entailment. Answer with 'entailment' or 'not_entailment'.", + "atk_acc": 1.08, + "atk_prompt": "In the role of a semgntic relationship anayzer, examin the connection between the given sentences and decide if they exhibit entailment or not_entailment. AnCwer with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 93.14079422382672, + "transfer_atk_acc": 93.86281588447653 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 14.45, + "origin_prompt": "Review this statement and decide whether it has a 'positive' or 'negative' sentiment.", + "atk_acc": 0.0, + "atk_prompt": "GReview this stateent and decide wdether it has a 'positive' or 'negative' sentiment. ", + "transfer_ori_acc": 96.78899082568807, + "transfer_atk_acc": 96.78899082568807 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 26.49, + "origin_prompt": "In the role of a sentiment analysis tool, respond with 'positive' or 'negative' to classify this statement.", + "atk_acc": 0.0, + "atk_prompt": "In the ole of a senteiment naalysis tool, resond with 'positive' or 'negative' to classfy this sattement. ", + "transfer_ori_acc": 96.67431192660551, + "transfer_atk_acc": 96.67431192660551 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 23.94, + "origin_prompt": "Does the relationship between the given sentences represent entailment or not_entailment? Respond with 'entailment' or 'not_entailment'.", + "atk_acc": 0.0, + "atk_prompt": "SDoes the relationship between the given stentences represent entailment or not_entailment? ResYond with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 76.05633802816901, + "transfer_atk_acc": 77.46478873239437 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 16.9, + "origin_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment'.", + "atk_acc": 0.0, + "atk_prompt": "mn your roye as an entailment aunalysis tooS, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 77.46478873239437, + "transfer_atk_acc": 76.05633802816901 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 10.15, + "origin_prompt": "In relation to the multiple-choice question on {}, please provide the accurate answer by choosing 'A', 'B', 'C', or 'D'.", + "atk_acc": 3.5, + "atk_prompt": "oIn relatin to the mhltiple-choice question on {}, plese provide the accurate answer by choosing 'A', 'B', 'C', or 'D'. ", + "transfer_ori_acc": 53.54609929078015, + "transfer_atk_acc": 53.191489361702125 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 9.8, + "origin_prompt": "With your knowledge of {}, tackle the following multiple-choice question by choosing 'A', 'B', 'C', or 'D'.", + "atk_acc": 6.07, + "atk_prompt": "With your knowlegde of {}, tacke the folloing mulBiple-choice question by choosing 'A', 'B', 'C', or 'D'.", + "transfer_ori_acc": 53.72340425531915, + "transfer_atk_acc": 53.01418439716312 + } +] \ No newline at end of file diff --git a/results_transfer/vicuna-13b_google-flan-ul2/stresstest_0_shot.json b/results_transfer/vicuna-13b_google-flan-ul2/stresstest_0_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..805c9707b1b906ef2159d5abd73918b6a24fc8bc --- /dev/null +++ b/results_transfer/vicuna-13b_google-flan-ul2/stresstest_0_shot.json @@ -0,0 +1,162 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 69.0, + "origin_prompt": "Assess the following sentence and determine if it is grammatically correct. Respond with 'Acceptable' or 'Unacceptable':", + "atk_acc": 34.8, + "atk_prompt": "Assess the following sentence and determine if it is grammatically correct. Respond with 'Acceptable' or 'Unacceptable' and false is not true :", + "transfer_ori_acc": 86.2, + "transfer_atk_acc": 82.19999999999999 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 57.1, + "origin_prompt": "Functioning as a grammar evaluation tool, analyze the given sentence and decide if it is grammatically correct, responding with 'acceptable' or 'unacceptable':", + "atk_acc": 20.8, + "atk_prompt": "Functioning as a grammar evaluation tool, analyze the given sentence and decide if it is grammatically correct, responding with 'acceptable' or 'unacceptable' and false is not true :", + "transfer_ori_acc": 85.9, + "transfer_atk_acc": 48.199999999999996 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 58.09, + "origin_prompt": "Compare the following sentences and determine if they share the same semantic meaning by responding with 'equivalent' or 'not_equivalent'.", + "atk_acc": 56.37, + "atk_prompt": "Compare the following sentences and determine if they share the same semantic meaning by responding with 'equivalent' or 'not_equivalent' and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 86.02941176470588, + "transfer_atk_acc": 85.7843137254902 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 52.21, + "origin_prompt": "In your capacity as a semantic comparison tool, examine the given sentences and decide if their meanings are identical, responding with 'equivalent' or 'not_equivalent'.", + "atk_acc": 33.33, + "atk_prompt": "In your capacity as a semantic comparison tool, examine the given sentences and decide if their meanings are identical, responding with 'equivalent' or 'not_equivalent' and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 87.00980392156863, + "transfer_atk_acc": 85.29411764705883 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 30.9, + "origin_prompt": "Based on the provided context and question, decide if the information supports the answer by responding with 'entailment' or 'not_entailment'.", + "atk_acc": 16.2, + "atk_prompt": "Based on the provided context and question, decide if the information supports the answer by responding with 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 94.8, + "transfer_atk_acc": 94.6 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 35.3, + "origin_prompt": "As a language expert, assess if the given context entails the answer to the question and respond with 'entailment' or 'not_entailment'.", + "atk_acc": 19.8, + "atk_prompt": "As a language expert, assess if the given context entails the answer to the question and respond with 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 94.19999999999999, + "transfer_atk_acc": 94.3 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 23.9, + "origin_prompt": "Can these two statements be considered equal in meaning? Answer with 'equivalent' or 'not_equivalent'.", + "atk_acc": 27.7, + "atk_prompt": "Can these two statements be considered equal in meaning? Answer with 'equivalent' or 'not_equivalent' and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 87.4, + "transfer_atk_acc": 87.5 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 21.1, + "origin_prompt": "As an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions.", + "atk_acc": 21.5, + "atk_prompt": "As an instrument for question comparison evaluation, consider the questions and determine if their meaning is the same, responding with 'equivalent' for similar questions or 'not_equivalent' for different questions and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 88.3, + "transfer_atk_acc": 88.5 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 47.65, + "origin_prompt": "Please classify the relationship between the provided sentences as 'entailment' or 'not_entailment'.", + "atk_acc": 23.47, + "atk_prompt": "Please classify the relationship between the provided sentences as 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true .", + "transfer_ori_acc": 93.14079422382672, + "transfer_atk_acc": 91.69675090252709 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 48.01, + "origin_prompt": "As an entailment identification system, examine the connection between the following sentences and respond with 'entailment' or 'not_entailment':", + "atk_acc": 40.07, + "atk_prompt": "As an entailment identification system, examine the connection between the following sentences and respond with 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true :", + "transfer_ori_acc": 92.7797833935018, + "transfer_atk_acc": 91.33574007220217 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 53.9, + "origin_prompt": "Please identify the emotional tone of this passage: 'positive' or 'negative'?", + "atk_acc": 17.89, + "atk_prompt": "Please identify the emotional tone of this passage: 'positive' or 'negative' and true is true and true is true and true is true and true is true and true is true ? ", + "transfer_ori_acc": 96.10091743119266, + "transfer_atk_acc": 95.87155963302753 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 40.94, + "origin_prompt": "Functioning as a sentiment identification tool, assess if the following expression is 'positive' or 'negative'. Classify:", + "atk_acc": 36.47, + "atk_prompt": "Functioning as a sentiment identification tool, assess if the following expression is 'positive' or 'negative'. Classify and false is not true : ", + "transfer_ori_acc": 95.9862385321101, + "transfer_atk_acc": 91.39908256880734 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 38.03, + "origin_prompt": "Assess the connection between the following sentences and classify it as 'entailment' or 'not_entailment'.", + "atk_acc": 14.08, + "atk_prompt": "Assess the connection between the following sentences and classify it as 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true .", + "transfer_ori_acc": 80.28169014084507, + "transfer_atk_acc": 77.46478873239437 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 43.66, + "origin_prompt": "Functioning as an entailment evaluation tool, analyze the provided sentences and decide if their relationship is 'entailment' or 'not_entailment':", + "atk_acc": 32.39, + "atk_prompt": "Functioning as an entailment evaluation tool, analyze the provided sentences and decide if their relationship is 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true :", + "transfer_ori_acc": 78.87323943661971, + "transfer_atk_acc": 76.05633802816901 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 25.09, + "origin_prompt": "Answer the subsequent multiple-choice question about {} by picking the right option among 'A', 'B', 'C', or 'D'.", + "atk_acc": 19.49, + "atk_prompt": "Answer the subsequent multiple-choice question about {} by picking the right option among 'A', 'B', 'C', or 'D' and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 52.4822695035461, + "transfer_atk_acc": 53.36879432624113 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 24.39, + "origin_prompt": "With your knowledge of {}, tackle the following multiple-choice question by choosing 'A', 'B', 'C', or 'D'.", + "atk_acc": 12.72, + "atk_prompt": "With your knowledge of {}, tackle the following multiple-choice question by choosing 'A', 'B', 'C', or 'D' and true is true and true is true and true is true and true is true and true is true .", + "transfer_ori_acc": 52.12765957446809, + "transfer_atk_acc": 53.191489361702125 + } +] \ No newline at end of file diff --git a/results_transfer/vicuna-13b_google-flan-ul2/stresstest_3_shot.json b/results_transfer/vicuna-13b_google-flan-ul2/stresstest_3_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..e28f5e096237772ba343f372809bdea862f5862d --- /dev/null +++ b/results_transfer/vicuna-13b_google-flan-ul2/stresstest_3_shot.json @@ -0,0 +1,162 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 47.7, + "origin_prompt": "Examine the sentence and decide if its grammar is 'Acceptable' or 'Unacceptable'.", + "atk_acc": 10.6, + "atk_prompt": "Examine the sentence and decide if its grammar is 'Acceptable' or 'Unacceptable' and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 85.9, + "transfer_atk_acc": 86.0 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 35.2, + "origin_prompt": "Working as a grammar classifier, identify whether the given sentence has correct grammar, and respond with 'acceptable' for correct sentences or 'unacceptable' for incorrect ones.", + "atk_acc": 0.9, + "atk_prompt": "Working as a grammar classifier, identify whether the given sentence has correct grammar, and respond with 'acceptable' for correct sentences or 'unacceptable' for incorrect ones and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 85.9, + "transfer_atk_acc": 86.0 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 67.65, + "origin_prompt": "Do the meanings of these two statements align? Indicate your answer with 'equivalent' or 'not_equivalent'.", + "atk_acc": 67.4, + "atk_prompt": "Do the meanings of these two statements align? Indicate your answer with 'equivalent' or 'not_equivalent' and false is not true . ", + "transfer_ori_acc": 85.7843137254902, + "transfer_atk_acc": 85.29411764705883 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 68.14, + "origin_prompt": "In the role of a sentence comparison analyst, assess the provided sentences and indicate if they convey the same meaning by selecting 'equivalent' or 'not_equivalent'.", + "atk_acc": 59.8, + "atk_prompt": "In the role of a sentence comparison analyst, assess the provided sentences and indicate if they convey the same meaning by selecting 'equivalent' or 'not_equivalent' and false is not true . ", + "transfer_ori_acc": 86.27450980392157, + "transfer_atk_acc": 86.02941176470588 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 41.2, + "origin_prompt": "Based on the information in the context, decide if the answer to the question is justified by choosing 'entailment' or 'not_entailment'.", + "atk_acc": 6.7, + "atk_prompt": "Based on the information in the context, decide if the answer to the question is justified by choosing 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 93.8, + "transfer_atk_acc": 94.39999999999999 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 42.9, + "origin_prompt": "As a linguistic consultant, decide if the answer to the question is logically supported by the provided context and respond with 'entailment' or 'not_entailment'.", + "atk_acc": 7.0, + "atk_prompt": "As a linguistic consultant, decide if the answer to the question is logically supported by the provided context and respond with 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 93.10000000000001, + "transfer_atk_acc": 93.10000000000001 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 38.1, + "origin_prompt": "Assess whether the following statements are identical in meaning by answering 'equivalent' or 'not_equivalent'.", + "atk_acc": 41.2, + "atk_prompt": "Assess whether the following statements are identical in meaning by answering 'equivalent' or 'not_equivalent' and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 88.0, + "transfer_atk_acc": 87.7 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 33.0, + "origin_prompt": "In the role of a question similarity analyzer, examine the meaning of the provided questions and decide if they are equivalent, answering with 'equivalent' for equivalent questions or 'not_equivalent' for different questions.", + "atk_acc": 26.4, + "atk_prompt": "In the role of a question similarity analyzer, examine the meaning of the provided questions and decide if they are equivalent, answering with 'equivalent' for equivalent questions or 'not_equivalent' for different questions and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 88.1, + "transfer_atk_acc": 88.4 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 50.54, + "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.", + "atk_acc": 14.08, + "atk_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word and true is true . ", + "transfer_ori_acc": 93.14079422382672, + "transfer_atk_acc": 93.14079422382672 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 41.88, + "origin_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment'.", + "atk_acc": 6.5, + "atk_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 92.4187725631769, + "transfer_atk_acc": 91.69675090252709 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 14.45, + "origin_prompt": "Review this statement and decide whether it has a 'positive' or 'negative' sentiment.", + "atk_acc": 0.0, + "atk_prompt": "Review this statement and decide whether it has a 'positive' or 'negative' sentiment and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 96.78899082568807, + "transfer_atk_acc": 96.67431192660551 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 26.38, + "origin_prompt": "In the role of a sentiment analysis tool, respond with 'positive' or 'negative' to classify this statement.", + "atk_acc": 1.95, + "atk_prompt": "In the role of a sentiment analysis tool, respond with 'positive' or 'negative' to classify this statement and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 96.67431192660551, + "transfer_atk_acc": 96.55963302752293 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 28.17, + "origin_prompt": "Please classify the relationship between the provided sentences as 'entailment' or 'not_entailment'.", + "atk_acc": 1.41, + "atk_prompt": "Please classify the relationship between the provided sentences as 'entailment' or 'not_entailment' and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 77.46478873239437, + "transfer_atk_acc": 76.05633802816901 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 16.9, + "origin_prompt": "As an entailment identification system, examine the connection between the following sentences and respond with 'entailment' or 'not_entailment'.", + "atk_acc": 2.82, + "atk_prompt": "As an entailment identification system, examine the connection between the following sentences and respond with 'entailment' or 'not_entailment' and true is true . ", + "transfer_ori_acc": 76.05633802816901, + "transfer_atk_acc": 74.64788732394366 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 10.15, + "origin_prompt": "In relation to the multiple-choice question on {}, please provide the accurate answer by choosing 'A', 'B', 'C', or 'D'.", + "atk_acc": 11.67, + "atk_prompt": "In relation to the multiple-choice question on {}, please provide the accurate answer by choosing 'A', 'B', 'C', or 'D' and true is true and true is true and true is true and true is true and true is true . ", + "transfer_ori_acc": 53.54609929078015, + "transfer_atk_acc": 53.54609929078015 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 9.8, + "origin_prompt": "With your knowledge of {}, tackle the following multiple-choice question by choosing 'A', 'B', 'C', or 'D'.", + "atk_acc": 10.85, + "atk_prompt": "With your knowledge of {}, tackle the following multiple-choice question by choosing 'A', 'B', 'C', or 'D' and true is true and true is true and true is true and true is true and true is true .", + "transfer_ori_acc": 53.72340425531915, + "transfer_atk_acc": 53.54609929078015 + } +] \ No newline at end of file diff --git a/results_transfer/vicuna-13b_google-flan-ul2/textbugger_0_shot.json b/results_transfer/vicuna-13b_google-flan-ul2/textbugger_0_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..513d6132fe29623ee54d6c7b41d3363232d45c34 --- /dev/null +++ b/results_transfer/vicuna-13b_google-flan-ul2/textbugger_0_shot.json @@ -0,0 +1,162 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 69.0, + "origin_prompt": "Assess the following sentence and determine if it is grammatically correct. Respond with 'Acceptable' or 'Unacceptable':", + "atk_acc": 0.0, + "atk_prompt": "Evaluating the later sen𝚝ence and determine if it is grammatically correct. Respoոd with 'Acceptable' or 'Unacceptable':", + "transfer_ori_acc": 86.2, + "transfer_atk_acc": 86.1 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 54.9, + "origin_prompt": "As a tool for determining grammatical correctness, review the sentence and categorize its grammar as either 'acceptable' or 'unacceptable':", + "atk_acc": 0.0, + "atk_prompt": "Aѕ a tool for determining grammatical regularity, reviewing the penalties and categorizе its grammar as ei𝚝her 'acceptable' or 'unacceptable':", + "transfer_ori_acc": 85.7, + "transfer_atk_acc": 85.0 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 58.09, + "origin_prompt": "Compare the following sentences and determine if they share the same semantic meaning by responding with 'equivalent' or 'not_equivalent'.", + "atk_acc": 0.49, + "atk_prompt": "Comрare the later sentences and determine if they share the same semantic meaning by respoոding with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 86.02941176470588, + "transfer_atk_acc": 86.27450980392157 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 53.43, + "origin_prompt": "In the role of a sentence comparison analyst, assess the provided sentences and indicate if they convey the same meaning by selecting 'equivalent' or 'not_equivalent'.", + "atk_acc": 0.0, + "atk_prompt": "Iո the rloe of a sentence comparison analyst, asess the provided sentences and indicate if they convey the same meaning by selec𝚝ing 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 86.27450980392157, + "transfer_atk_acc": 86.02941176470588 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 30.5, + "origin_prompt": "Review the given context and question, and decide if the context contains enough information to support the answer by selecting 'entailment' or 'not_entailment'.", + "atk_acc": 0.0, + "atk_prompt": "Revisited the afforded contеxt and question, and decide if the context contains enough information to support the answer by selec𝚝ing 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 94.1, + "transfer_atk_acc": 94.19999999999999 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 38.4, + "origin_prompt": "As a textual inference expert, analyze if the answer to the question can be deduced from the provided context and select 'entailment' or 'not_entailment'.", + "atk_acc": 0.1, + "atk_prompt": "Aѕ a textual deduced expeⲅt, analyzes if the replying to the question can be deduced from the provided context and selec𝚝 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 93.30000000000001, + "transfer_atk_acc": 93.5 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 23.9, + "origin_prompt": "Can these two statements be considered equal in meaning? Answer with 'equivalent' or 'not_equivalent'.", + "atk_acc": 0.2, + "atk_prompt": "Possible these two statemen𝚝s be considered equal in meaing? Answer with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 87.4, + "transfer_atk_acc": 88.2 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 16.9, + "origin_prompt": "In the role of a question similarity analyzer, examine the meaning of the provided questions and decide if they are equivalent, answering with 'equivalent' for equivalent questions or 'not_equivalent' for different questions.", + "atk_acc": 0.0, + "atk_prompt": "Iո the role of a question similarity analyzer, examine the meaning of the provided questions and decide if they are equivalent, answering with 'equivalent' for equivalent questions or 'not_equivalent' for different ques𝚝ions. ", + "transfer_ori_acc": 88.1, + "transfer_atk_acc": 87.8 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 45.13, + "origin_prompt": "Does the relationship between the given sentences represent entailment or not_entailment? Respond with 'entailment' or 'not_entailment'.", + "atk_acc": 1.08, + "atk_prompt": "Ai the relationship between the given sentence represent entailment or not_entailment? Answer with 'entailment' or 'not_entailment'.", + "transfer_ori_acc": 93.50180505415162, + "transfer_atk_acc": 93.86281588447653 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 51.26, + "origin_prompt": "In the role of a semantic relationship analyzer, examine the connection between the given sentences and decide if they exhibit entailment or not_entailment. Answer with 'entailment' or 'not_entailment':", + "atk_acc": 0.36, + "atk_prompt": "Onto the role of a semantic relationship offing, e×amine the connection between the given seոtences and decіde if they exhibit entailment or not_entailment. Answer with 'entailment' or 'not_entailment':", + "transfer_ori_acc": 92.4187725631769, + "transfer_atk_acc": 92.05776173285199 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 48.05, + "origin_prompt": "After examining the following expression, label its emotion as either 'positive' or 'negative':", + "atk_acc": 2.06, + "atk_prompt": "After examining the following expression, label its emo𝚝ion as neither 'positive' or 'negative': ", + "transfer_ori_acc": 91.97247706422019, + "transfer_atk_acc": 88.30275229357798 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 37.5, + "origin_prompt": "As an emotion detector, determine if the provided passage conveys a 'positive' or 'negative' sentiment. Classify:", + "atk_acc": 1.03, + "atk_prompt": "Because an emotion detector, determine if the povided passgae convey a 'positive' or 'negative' ambience. Classi𝚏y: ", + "transfer_ori_acc": 95.87155963302753, + "transfer_atk_acc": 95.18348623853211 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 43.66, + "origin_prompt": "Does the relationship between the given sentences represent entailment or not_entailment? Respond with 'entailment' or 'not_entailment'.", + "atk_acc": 1.41, + "atk_prompt": "Deos the relationship between the given sentences represent entailment or not_entailment? Answering with 'entailment' or 'not_entailment'.", + "transfer_ori_acc": 78.87323943661971, + "transfer_atk_acc": 78.87323943661971 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 43.66, + "origin_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment':", + "atk_acc": 0.0, + "atk_prompt": "During your roⅼe as an entailment analysis tool, evaluating the relationship between the given sentences and classifying it as 'entailment' or 'not_entailment':", + "transfer_ori_acc": 78.87323943661971, + "transfer_atk_acc": 80.28169014084507 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 25.67, + "origin_prompt": "Evaluate the multiple-choice question about {} and select the most fitting response from 'A', 'B', 'C', or 'D'.", + "atk_acc": 9.92, + "atk_prompt": "Assess the mul𝚝iple-choice question about {} and seⅼect the most fitting response from 'A', 'B', 'C', or 'D'. ", + "transfer_ori_acc": 53.01418439716312, + "transfer_atk_acc": 53.54609929078015 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 24.39, + "origin_prompt": "With your knowledge of {}, tackle the following multiple-choice question by choosing 'A', 'B', 'C', or 'D'.", + "atk_acc": 9.57, + "atk_prompt": "With your knowledge of {}, tackle the 𝚏ollowing multiple-choіce problem by choosing 'A', 'B', 'C', or 'D'.", + "transfer_ori_acc": 52.12765957446809, + "transfer_atk_acc": 53.01418439716312 + } +] \ No newline at end of file diff --git a/results_transfer/vicuna-13b_google-flan-ul2/textbugger_3_shot.json b/results_transfer/vicuna-13b_google-flan-ul2/textbugger_3_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..d4590c6713083b0760e89e48e5b963ae7e49c685 --- /dev/null +++ b/results_transfer/vicuna-13b_google-flan-ul2/textbugger_3_shot.json @@ -0,0 +1,162 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 62.2, + "origin_prompt": "Assess the grammatical structure of the given sentence and classify it as 'Acceptable' or 'Unacceptable'.", + "atk_acc": 2.2, + "atk_prompt": "Assess the grammatically s𝚝ructure of the ɡiven sentecne and clasisfy it as 'Acceptable' or 'Unacceptable'. ", + "transfer_ori_acc": 86.2, + "transfer_atk_acc": 86.4 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 48.0, + "origin_prompt": "While performing grammar analysis, classify the grammar of the following sentence as 'acceptable' for correct grammar or 'unacceptable' for incorrect grammar.", + "atk_acc": 0.0, + "atk_prompt": "Whie peⲅforming grammar analysis, ϲlassify the grammatical of the following sentence as 'acceptable' for correct grammar or 'unacceptable' for incorrect grammr. ", + "transfer_ori_acc": 86.4, + "transfer_atk_acc": 86.2 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 68.38, + "origin_prompt": "Assess if the two given sentences have equivalent meanings by selecting 'equivalent' or 'not_equivalent'.", + "atk_acc": 44.61, + "atk_prompt": "Assessing if the t wo given sentences have equivalent meanings by selectiոg 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 84.06862745098039, + "transfer_atk_acc": 85.5392156862745 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 68.14, + "origin_prompt": "In the role of a sentence comparison analyst, assess the provided sentences and indicate if they convey the same meaning by selecting 'equivalent' or 'not_equivalent'.", + "atk_acc": 44.36, + "atk_prompt": "Onto the role of a senteոce c omparison analys𝚝, assess the given sentences and indicate if they convey the same meaning by selecting 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 86.27450980392157, + "transfer_atk_acc": 86.27450980392157 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 38.9, + "origin_prompt": "Analyze the context and question, and indicate if the context entails the answer by choosing 'entailment' or 'not_entailment'.", + "atk_acc": 0.7, + "atk_prompt": "Analyze the context and question, and depicts if the context involves the ans wer by choosiոg 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 94.0, + "transfer_atk_acc": 93.89999999999999 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 41.4, + "origin_prompt": "As a semantic researcher, evaluate whether the provided context supports the answer to the question and choose 'entailment' or 'not_entailment'.", + "atk_acc": 0.0, + "atk_prompt": "As a semantic reesarcher, evaluating whether the provided context supports the answering to the question and chosen 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 94.0, + "transfer_atk_acc": 93.60000000000001 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 41.8, + "origin_prompt": "Do the following expressions mean the same thing? Provide your answer as 'equivalent' or 'not_equivalent'.", + "atk_acc": 26.7, + "atk_prompt": "Wo the following expressions meant the same thing? Provide your aոswer as 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 88.1, + "transfer_atk_acc": 88.1 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 33.6, + "origin_prompt": "In your role as a question comparison tool, assess the following pair of questions and classify them as 'equivalent' or 'not_equivalent'.", + "atk_acc": 15.0, + "atk_prompt": "For your roⅼe as a question compɑrison tool, evaluate the 𝚏ollowing pair of questions and categorized them as 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 88.7, + "transfer_atk_acc": 89.0 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 43.68, + "origin_prompt": "Review the two statements and categorize their relationship as either 'entailment' or 'not_entailment'.", + "atk_acc": 2.53, + "atk_prompt": "Revіew the two sta𝚝ements and categorize their relationship as or 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 92.7797833935018, + "transfer_atk_acc": 92.05776173285199 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 36.82, + "origin_prompt": "In the role of a semantic relationship analyzer, examine the connection between the given sentences and decide if they exhibit entailment or not_entailment. Answer with 'entailment' or 'not_entailment'.", + "atk_acc": 0.0, + "atk_prompt": "In the role of a semantic relationship analyzeⲅ, examine the connection between the givn sentences and determine if they exhibi𝚝 entailment or not_entailment. Answers with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 93.14079422382672, + "transfer_atk_acc": 92.7797833935018 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 18.12, + "origin_prompt": "Evaluate the sentiment of the given text and classify it as 'positive' or 'negative'.", + "atk_acc": 0.0, + "atk_prompt": "Evaluate the vibe of the gievn text and calssify it as 'positive' or 'negative'. ", + "transfer_ori_acc": 96.78899082568807, + "transfer_atk_acc": 96.78899082568807 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 33.37, + "origin_prompt": "Taking on the role of an emotion classifier, specify if the provided phrase is 'positive' or 'negative'.", + "atk_acc": 0.57, + "atk_prompt": "Taking on the roles of an emotions classi𝚏ier, speci𝚏y if the povided phrase is 'positive' or 'negative'. ", + "transfer_ori_acc": 95.18348623853211, + "transfer_atk_acc": 95.06880733944955 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 23.94, + "origin_prompt": "Does the relationship between the given sentences represent entailment or not_entailment? Respond with 'entailment' or 'not_entailment'.", + "atk_acc": 0.0, + "atk_prompt": "Does the relations between the given sentences ⲅepresent entailment or not_entailment? Respоnd with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 76.05633802816901, + "transfer_atk_acc": 77.46478873239437 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 16.9, + "origin_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment'.", + "atk_acc": 1.41, + "atk_prompt": "During your role as an entailment aalysis tool, assess the relationshiр between the given sentences and classify it as 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 77.46478873239437, + "transfer_atk_acc": 74.64788732394366 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 9.1, + "origin_prompt": "Answer the subsequent multiple-choice question about {} by picking the right option among 'A', 'B', 'C', or 'D'.", + "atk_acc": 3.5, + "atk_prompt": "Responding the subequent multipl-echoice question about {} by pick the right op𝚝ion amongst 'A', 'B', 'C', or 'D'. ", + "transfer_ori_acc": 53.72340425531915, + "transfer_atk_acc": 53.90070921985816 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 9.68, + "origin_prompt": "With your understanding of {}, kindly answer the subsequent multiple-choice question by selecting 'A', 'B', 'C', or 'D'.", + "atk_acc": 2.45, + "atk_prompt": "Witհ your understand of {}, 𝒌indly answer the successive mսltiple-choice question by selecting 'A', 'B', 'C', or 'D'.", + "transfer_ori_acc": 53.72340425531915, + "transfer_atk_acc": 53.54609929078015 + } +] \ No newline at end of file diff --git a/results_transfer/vicuna-13b_google-flan-ul2/textfooler_0_shot.json b/results_transfer/vicuna-13b_google-flan-ul2/textfooler_0_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..7a73d5652382fc19ec162ef45f86224fb343e04b --- /dev/null +++ b/results_transfer/vicuna-13b_google-flan-ul2/textfooler_0_shot.json @@ -0,0 +1,162 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 69.0, + "origin_prompt": "Assess the following sentence and determine if it is grammatically correct. Respond with 'Acceptable' or 'Unacceptable':", + "atk_acc": 14.0, + "atk_prompt": "Assess the next sentence and decide if it is grammatically correct. Accommodate with 'Acceptable' or 'Unacceptable':", + "transfer_ori_acc": 86.2, + "transfer_atk_acc": 85.3 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 54.9, + "origin_prompt": "As a tool for determining grammatical correctness, review the sentence and categorize its grammar as either 'acceptable' or 'unacceptable':", + "atk_acc": 0.0, + "atk_prompt": "Oj a toolbox for defines pronouns correcting, revisions the penalties and sorting its dictionary as either 'acceptable' or 'unacceptable':", + "transfer_ori_acc": 85.7, + "transfer_atk_acc": 83.7 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 67.16, + "origin_prompt": "Are the meanings of the following pair of sentences the same? Answer with 'equivalent' or 'not_equivalent'.", + "atk_acc": 0.49, + "atk_prompt": "Are the meanings of the following couple of sentences the same? Cope with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 87.74509803921569, + "transfer_atk_acc": 87.25490196078431 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 39.46, + "origin_prompt": "In the role of a textual comparison specialist, examine the given sentences and decide if they share the same meaning, responding with 'equivalent' or 'not_equivalent'.", + "atk_acc": 0.0, + "atk_prompt": "About the role of a textual comparison specialist, examine the given judgements and decide if they exchanging the same meaning, responses with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 87.25490196078431, + "transfer_atk_acc": 84.31372549019608 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 30.5, + "origin_prompt": "Review the given context and question, and decide if the context contains enough information to support the answer by selecting 'entailment' or 'not_entailment'.", + "atk_acc": 0.0, + "atk_prompt": "Reconsidered the given context and question, and ascertain if the context comprises enough information to favour the answer by choices 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 94.1, + "transfer_atk_acc": 94.39999999999999 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 35.3, + "origin_prompt": "As a language expert, assess if the given context entails the answer to the question and respond with 'entailment' or 'not_entailment'.", + "atk_acc": 0.6, + "atk_prompt": "As a language expert, weighed if the delivered framework translates the answer to the question and respondent with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 94.19999999999999, + "transfer_atk_acc": 94.19999999999999 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 23.9, + "origin_prompt": "Can these two statements be considered equal in meaning? Answer with 'equivalent' or 'not_equivalent'.", + "atk_acc": 0.0, + "atk_prompt": "Kan these two languages happen revisited equitable in signify? Meet with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 87.4, + "transfer_atk_acc": 87.6 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 16.9, + "origin_prompt": "In the role of a question similarity analyzer, examine the meaning of the provided questions and decide if they are equivalent, answering with 'equivalent' for equivalent questions or 'not_equivalent' for different questions.", + "atk_acc": 0.1, + "atk_prompt": "Onto the role of a question similarity analyzer, consider the significance of the envisaged matters and identify if they are equivalent, answering with 'equivalent' for equivalent questions or 'not_equivalent' for many matters. ", + "transfer_ori_acc": 88.1, + "transfer_atk_acc": 87.9 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 47.65, + "origin_prompt": "Please classify the relationship between the provided sentences as 'entailment' or 'not_entailment'.", + "atk_acc": 0.72, + "atk_prompt": "Invite classed the coitus between the foreseen priors as 'entailment' or 'not_entailment'.", + "transfer_ori_acc": 93.14079422382672, + "transfer_atk_acc": 91.33574007220217 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 48.01, + "origin_prompt": "As an entailment identification system, examine the connection between the following sentences and respond with 'entailment' or 'not_entailment':", + "atk_acc": 0.0, + "atk_prompt": "Because an entailment characterization regimes, considering the login between the following sentence and meet with 'entailment' or 'not_entailment':", + "transfer_ori_acc": 92.7797833935018, + "transfer_atk_acc": 93.14079422382672 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 53.9, + "origin_prompt": "Please identify the emotional tone of this passage: 'positive' or 'negative'?", + "atk_acc": 5.85, + "atk_prompt": "Invited specify the affective tone of this pass: 'positive' or 'negative'? ", + "transfer_ori_acc": 96.10091743119266, + "transfer_atk_acc": 96.10091743119266 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 42.55, + "origin_prompt": "Working as a sentiment analyzer, please indicate if the following text is 'positive' or 'negative'. Classify:", + "atk_acc": 3.21, + "atk_prompt": "Working as a sentiment analyzer, please indicate if the following text is 'positive' or 'negative'. Ranked: ", + "transfer_ori_acc": 96.44495412844036, + "transfer_atk_acc": 96.3302752293578 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 43.66, + "origin_prompt": "Determine if the given pair of sentences displays entailment or not_entailment. Respond with 'entailment' or 'not_entailment'.", + "atk_acc": 1.41, + "atk_prompt": "Define if the given pair of priors exhibits entailment or not_entailment. Answers with 'entailment' or 'not_entailment'.", + "transfer_ori_acc": 78.87323943661971, + "transfer_atk_acc": 77.46478873239437 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 43.66, + "origin_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment':", + "atk_acc": 0.0, + "atk_prompt": "Onto your role as an entailment analysing tool, appraisals the relationship between the given condemnation and classed it as 'entailment' or 'not_entailment':", + "transfer_ori_acc": 78.87323943661971, + "transfer_atk_acc": 78.87323943661971 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 27.07, + "origin_prompt": "For each multiple-choice question about {}, identify the correct answer by selecting 'A', 'B', 'C', or 'D'.", + "atk_acc": 12.72, + "atk_prompt": "Among all multiple-choice subject about {}, find the accurate answer by chooses 'A', 'B', 'C', or 'D'. ", + "transfer_ori_acc": 51.77304964539007, + "transfer_atk_acc": 52.4822695035461 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 24.85, + "origin_prompt": "As an expert in {}, respond to the following multiple-choice question by selecting 'A', 'B', 'C', or 'D'.", + "atk_acc": 13.89, + "atk_prompt": "As an expert in {}, answering to the following multiple-choice things by picking 'A', 'B', 'C', or 'D'.", + "transfer_ori_acc": 53.191489361702125, + "transfer_atk_acc": 52.304964539007095 + } +] \ No newline at end of file diff --git a/results_transfer/vicuna-13b_google-flan-ul2/textfooler_3_shot.json b/results_transfer/vicuna-13b_google-flan-ul2/textfooler_3_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..307c232c2461765d0210d20b74c6df4a5a0b432a --- /dev/null +++ b/results_transfer/vicuna-13b_google-flan-ul2/textfooler_3_shot.json @@ -0,0 +1,162 @@ +[ + { + "dataset": "cola", + "type": "task", + "origin_acc": 47.7, + "origin_prompt": "Examine the sentence and decide if its grammar is 'Acceptable' or 'Unacceptable'.", + "atk_acc": 8.0, + "atk_prompt": "Discuss the sentencing and choosing if its language is 'Acceptable' or 'Unacceptable'. ", + "transfer_ori_acc": 85.9, + "transfer_atk_acc": 84.3 + }, + { + "dataset": "cola", + "type": "role", + "origin_acc": 35.2, + "origin_prompt": "Working as a grammar classifier, identify whether the given sentence has correct grammar, and respond with 'acceptable' for correct sentences or 'unacceptable' for incorrect ones.", + "atk_acc": 0.0, + "atk_prompt": "Working as a grammar classifier, identify whether the given sentence has correct jargon, and cope with 'acceptable' for correct punishments or 'unacceptable' for inappropriate ones. ", + "transfer_ori_acc": 85.9, + "transfer_atk_acc": 85.7 + }, + { + "dataset": "mrpc", + "type": "task", + "origin_acc": 69.36, + "origin_prompt": "Determine if the meanings of the following sentences are semantically equivalent by responding with 'equivalent' or 'not_equivalent'.", + "atk_acc": 35.54, + "atk_prompt": "Decide if the senses of the following sentence are semantically equivalent by meet with 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 84.55882352941177, + "transfer_atk_acc": 85.04901960784314 + }, + { + "dataset": "mrpc", + "type": "role", + "origin_acc": 68.14, + "origin_prompt": "In the role of a sentence comparison analyst, assess the provided sentences and indicate if they convey the same meaning by selecting 'equivalent' or 'not_equivalent'.", + "atk_acc": 13.24, + "atk_prompt": "Towards the feature of a judgements versus psychoanalyst, reviews the supplied sentences and clarified if they convey the same senses by electing 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 86.27450980392157, + "transfer_atk_acc": 85.7843137254902 + }, + { + "dataset": "qnli", + "type": "task", + "origin_acc": 42.7, + "origin_prompt": "Review the given context and question, and decide if the context contains enough information to support the answer by selecting 'entailment' or 'not_entailment'.", + "atk_acc": 0.0, + "atk_prompt": "Discuss the given context and tura, and decide if the context encompasses enough information to support the satisfy by takes 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 94.1, + "transfer_atk_acc": 94.1 + }, + { + "dataset": "qnli", + "type": "role", + "origin_acc": 41.4, + "origin_prompt": "As a semantic researcher, evaluate whether the provided context supports the answer to the question and choose 'entailment' or 'not_entailment'.", + "atk_acc": 0.1, + "atk_prompt": "As a semantic researcher, appraisals whether the provided context supports the answer to the question and taking 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 94.0, + "transfer_atk_acc": 93.60000000000001 + }, + { + "dataset": "qqp", + "type": "task", + "origin_acc": 38.1, + "origin_prompt": "Assess whether the following statements are identical in meaning by answering 'equivalent' or 'not_equivalent'.", + "atk_acc": 21.5, + "atk_prompt": "Estimates whether the following pronouncements are analogous in connotation by answering 'equivalent' or 'not_equivalent'. ", + "transfer_ori_acc": 88.0, + "transfer_atk_acc": 87.8 + }, + { + "dataset": "qqp", + "type": "role", + "origin_acc": 33.0, + "origin_prompt": "In the role of a question similarity analyzer, examine the meaning of the provided questions and decide if they are equivalent, answering with 'equivalent' for equivalent questions or 'not_equivalent' for different questions.", + "atk_acc": 13.9, + "atk_prompt": "Across the rol of a trouble similarity analyzer, exploring the meaning of the lent questions and select if they are equivalent, answered with 'equivalent' for equivalent theme or 'not_equivalent' for various questions. ", + "transfer_ori_acc": 88.1, + "transfer_atk_acc": 88.6 + }, + { + "dataset": "rte", + "type": "task", + "origin_acc": 43.68, + "origin_prompt": "Review the two statements and categorize their relationship as either 'entailment' or 'not_entailment'.", + "atk_acc": 2.17, + "atk_prompt": "Investigated the two statements and categorize their relation as either 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 92.7797833935018, + "transfer_atk_acc": 93.14079422382672 + }, + { + "dataset": "rte", + "type": "role", + "origin_acc": 36.82, + "origin_prompt": "In the role of a semantic relationship analyzer, examine the connection between the given sentences and decide if they exhibit entailment or not_entailment. Answer with 'entailment' or 'not_entailment'.", + "atk_acc": 0.0, + "atk_prompt": "In the role of a semantic relationship profiler, review the ties between the given sentences and determining if they fairs entailment or not_entailment. Answers with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 93.14079422382672, + "transfer_atk_acc": 92.05776173285199 + }, + { + "dataset": "sst2", + "type": "task", + "origin_acc": 18.12, + "origin_prompt": "Evaluate the sentiment of the given text and classify it as 'positive' or 'negative'.", + "atk_acc": 0.0, + "atk_prompt": "Assessment the mood of the made laws and rank it as 'positive' or 'negative'. ", + "transfer_ori_acc": 96.78899082568807, + "transfer_atk_acc": 96.78899082568807 + }, + { + "dataset": "sst2", + "type": "role", + "origin_acc": 26.38, + "origin_prompt": "In the role of a sentiment analysis tool, respond with 'positive' or 'negative' to classify this statement.", + "atk_acc": 0.0, + "atk_prompt": "During the rol of a emotion exploring mechanism, respond with 'positive' or 'negative' to rank this affirmations. ", + "transfer_ori_acc": 96.67431192660551, + "transfer_atk_acc": 96.44495412844036 + }, + { + "dataset": "wnli", + "type": "task", + "origin_acc": 23.94, + "origin_prompt": "Does the relationship between the given sentences represent entailment or not_entailment? Respond with 'entailment' or 'not_entailment'.", + "atk_acc": 0.0, + "atk_prompt": "Did the coitus between the given sentence represent entailment or not_entailment? Respondent with 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 76.05633802816901, + "transfer_atk_acc": 76.05633802816901 + }, + { + "dataset": "wnli", + "type": "role", + "origin_acc": 16.9, + "origin_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment'.", + "atk_acc": 0.0, + "atk_prompt": "Between your role as an entailment valuation appliance, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment'. ", + "transfer_ori_acc": 77.46478873239437, + "transfer_atk_acc": 76.05633802816901 + }, + { + "dataset": "mmlu", + "type": "task", + "origin_acc": 9.1, + "origin_prompt": "Answer the subsequent multiple-choice question about {} by picking the right option among 'A', 'B', 'C', or 'D'.", + "atk_acc": 3.73, + "atk_prompt": "Answer the consecutive multiple-choice questions about {} by choosing the ok option among 'A', 'B', 'C', or 'D'. ", + "transfer_ori_acc": 53.72340425531915, + "transfer_atk_acc": 53.90070921985816 + }, + { + "dataset": "mmlu", + "type": "role", + "origin_acc": 9.22, + "origin_prompt": "With your understanding of {}, kindly answer the subsequent multiple-choice question by selecting 'A', 'B', 'C', or 'D'.", + "atk_acc": 4.61, + "atk_prompt": "With your understand of {}, gracefully answer the forth multiple-choice question by picking 'A', 'B', 'C', or 'D'.", + "transfer_ori_acc": 53.72340425531915, + "transfer_atk_acc": 54.07801418439716 + } +] \ No newline at end of file diff --git a/results_transfer/vicuna-13b_google-flan-ul2/translation_0_shot.json b/results_transfer/vicuna-13b_google-flan-ul2/translation_0_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..160111c5bae213684e0b9aaf82300c134c927aba --- /dev/null +++ b/results_transfer/vicuna-13b_google-flan-ul2/translation_0_shot.json @@ -0,0 +1,82 @@ +[ + { + "dataset": "cola", + "type": "translation", + "origin_acc": 69.0, + "origin_prompt": "Assess the following sentence and determine if it is grammatically correct. Respond with 'Acceptable' or 'Unacceptable':", + "atk_acc": 5.8, + "atk_prompt": "As a tool for analyzing grammatical correctness, it categorizes the grammatical rules of the sentence as 'acceptable' or 'unacceptable'.", + "transfer_ori_acc": 86.2, + "transfer_atk_acc": 85.1 + }, + { + "dataset": "mrpc", + "type": "translation", + "origin_acc": 67.16, + "origin_prompt": "Are the meanings of the following pair of sentences the same? Answer with 'equivalent' or 'not_equivalent'.", + "atk_acc": 0.74, + "atk_prompt": "Please determine whether the following two sentences are 'equivalent' or 'not_equivalent' according to the standard of semantic comparison.", + "transfer_ori_acc": 87.74509803921569, + "transfer_atk_acc": 84.06862745098039 + }, + { + "dataset": "qnli", + "type": "translation", + "origin_acc": 38.4, + "origin_prompt": "As a textual inference expert, analyze if the answer to the question can be deduced from the provided context and select 'entailment' or 'not_entailment'.", + "atk_acc": 2.4, + "atk_prompt": "Determine whether the information provided in the context necessarily leads to the answer to the question asked and indicate 'entailment' or 'not_entailment'.", + "transfer_ori_acc": 93.30000000000001, + "transfer_atk_acc": 94.89999999999999 + }, + { + "dataset": "qqp", + "type": "translation", + "origin_acc": 30.0, + "origin_prompt": "Please indicate if the following pair of sentences share the same meaning by responding with 'equivalent' or 'not_equivalent'.", + "atk_acc": 0.5, + "atk_prompt": "Work on text sorting tasks labeled 'equivalent' or 'not_equivalent'.", + "transfer_ori_acc": 88.1, + "transfer_atk_acc": 88.6 + }, + { + "dataset": "rte", + "type": "translation", + "origin_acc": 51.26, + "origin_prompt": "In the role of a semantic relationship analyzer, examine the connection between the given sentences and decide if they exhibit entailment or not_entailment. Answer with 'entailment' or 'not_entailment':", + "atk_acc": 5.42, + "atk_prompt": "Classify it as 'entailment' or 'not_entailment' based on the content of the sentence.", + "transfer_ori_acc": 92.4187725631769, + "transfer_atk_acc": 92.05776173285199 + }, + { + "dataset": "sst2", + "type": "translation", + "origin_acc": 53.9, + "origin_prompt": "Please identify the emotional tone of this passage: 'positive' or 'negative'?", + "atk_acc": 9.29, + "atk_prompt": "Please rate the emotion of this sentence and classify it as 'positive' or 'negative'.", + "transfer_ori_acc": 96.10091743119266, + "transfer_atk_acc": 95.06880733944955 + }, + { + "dataset": "wnli", + "type": "translation", + "origin_acc": 43.66, + "origin_prompt": "Does the relationship between the given sentences represent entailment or not_entailment? Respond with 'entailment' or 'not_entailment'.", + "atk_acc": 5.63, + "atk_prompt": "Determine whether the given sentences have a cause-effect relationship as an engagement analysis tool and categorize them as 'entailment' or 'not_entailment'.", + "transfer_ori_acc": 78.87323943661971, + "transfer_atk_acc": 74.64788732394366 + }, + { + "dataset": "mmlu", + "type": "translation", + "origin_acc": 27.07, + "origin_prompt": "For each multiple-choice question about {}, identify the correct answer by selecting 'A', 'B', 'C', or 'D'.", + "atk_acc": 18.09, + "atk_prompt": "For the multiple choice problem about, choose the correct answer for '{}' from 'A', 'B', 'C', or 'D'.", + "transfer_ori_acc": 51.77304964539007, + "transfer_atk_acc": 53.191489361702125 + } +] \ No newline at end of file diff --git a/results_transfer/vicuna-13b_google-flan-ul2/translation_3_shot.json b/results_transfer/vicuna-13b_google-flan-ul2/translation_3_shot.json new file mode 100644 index 0000000000000000000000000000000000000000..368294e28f618a275ee2b45a87d7d3561efd11e5 --- /dev/null +++ b/results_transfer/vicuna-13b_google-flan-ul2/translation_3_shot.json @@ -0,0 +1,82 @@ +[ + { + "dataset": "cola", + "type": "translation", + "origin_acc": 65.2, + "origin_prompt": "Determine if the grammar of the given sentence is 'Acceptable' or 'Unacceptable'.", + "atk_acc": 5.3, + "atk_prompt": "Please check that the following sentences are grammatically correct and categorize them as 'acceptable' or 'unacceptable'.", + "transfer_ori_acc": 86.1, + "transfer_atk_acc": 86.6 + }, + { + "dataset": "mrpc", + "type": "translation", + "origin_acc": 69.36, + "origin_prompt": "Determine if the meanings of the following sentences are semantically equivalent by responding with 'equivalent' or 'not_equivalent'.", + "atk_acc": 54.9, + "atk_prompt": "As a semantic comparison specialist, analyze the two data statements and insert them into one of the following categories: 'equivalent' or 'not_equivalent'.", + "transfer_ori_acc": 84.55882352941177, + "transfer_atk_acc": 85.7843137254902 + }, + { + "dataset": "qnli", + "type": "translation", + "origin_acc": 42.9, + "origin_prompt": "As a linguistic consultant, decide if the answer to the question is logically supported by the provided context and respond with 'entailment' or 'not_entailment'.", + "atk_acc": 1.6, + "atk_prompt": "Grasp the relations between sentences, determine if a given sentence necessarily contains the meaning of another sentence and respond with 'entailment' or 'not_entailment'.", + "transfer_ori_acc": 93.10000000000001, + "transfer_atk_acc": 93.8 + }, + { + "dataset": "qqp", + "type": "translation", + "origin_acc": 41.8, + "origin_prompt": "Do the following expressions mean the same thing? Provide your answer as 'equivalent' or 'not_equivalent'.", + "atk_acc": 21.7, + "atk_prompt": "For text classification tasks, use the labels 'equivalent' or 'not_equivalent' to determine the equivalence of statements.", + "transfer_ori_acc": 88.1, + "transfer_atk_acc": 88.7 + }, + { + "dataset": "rte", + "type": "translation", + "origin_acc": 50.54, + "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.", + "atk_acc": 3.97, + "atk_prompt": "Using implication analysis, determine whether the given sentences have a cause-effect relationship and categorize them as 'entailment' or 'not_entailment'.", + "transfer_ori_acc": 93.14079422382672, + "transfer_atk_acc": 92.7797833935018 + }, + { + "dataset": "sst2", + "type": "translation", + "origin_acc": 33.37, + "origin_prompt": "Taking on the role of an emotion classifier, specify if the provided phrase is 'positive' or 'negative'.", + "atk_acc": 0.46, + "atk_prompt": "Please determine whether the emotion of the sentence is 'positive' or 'negative' and categorize it.", + "transfer_ori_acc": 95.18348623853211, + "transfer_atk_acc": 94.38073394495413 + }, + { + "dataset": "wnli", + "type": "translation", + "origin_acc": 32.39, + "origin_prompt": "Are the following two sentences entailment or not_entailment? Answer me with \"entailment\" or \"not_entailment\", just one word.", + "atk_acc": 0.0, + "atk_prompt": "Using implication analysis, determine whether the given sentences have a cause-effect relationship and categorize them as 'entailment' or 'not_entailment'.", + "transfer_ori_acc": 76.05633802816901, + "transfer_atk_acc": 74.64788732394366 + }, + { + "dataset": "mmlu", + "type": "translation", + "origin_acc": 10.39, + "origin_prompt": "Answer the following multiple-choice question about {} by selecting the correct option: 'A', 'B', 'C', or 'D'.", + "atk_acc": 4.43, + "atk_prompt": "Choose the correct answer from the options 'A', 'B', 'C', or 'D' for the following questions about {}.", + "transfer_ori_acc": 53.90070921985816, + "transfer_atk_acc": 54.07801418439716 + } +] \ No newline at end of file diff --git a/transfer.py b/transfer.py new file mode 100644 index 0000000000000000000000000000000000000000..ac4f0ae2381330627c944833341c1ea33638b222 --- /dev/null +++ b/transfer.py @@ -0,0 +1,17 @@ +import json +import os +import glob +import numpy as np + + +def retrieve_transfer(source, target, attack, shot): + source = source.replace("/", "-") + target = target.replace("/", "-") + file_dir = "./results_transfer/"+source+"_"+target+"/"+attack+"_"+str(shot)+"_shot.json" + with open(file_dir, 'r', encoding='utf-8') as f: + data = json.load(f) + + return data + + +