Spaces:

March07
/

PromptBench

Runtime error

File size: 8,724 Bytes

683d0f6

[
    {
        "dataset": "cola",
        "type": "task",
        "origin_acc": 69.0,
        "origin_prompt": "Assess the following sentence and determine if it is grammatically correct. Respond with 'Acceptable' or 'Unacceptable':",
        "atk_acc": 0.0,
        "atk_prompt": "Evaluating the later sen𝚝ence and determine if it is grammatically correct. Respoոd with 'Acceptable' or 'Unacceptable':",
        "transfer_ori_acc": 78.0,
        "transfer_atk_acc": 76.1
    },
    {
        "dataset": "cola",
        "type": "role",
        "origin_acc": 54.9,
        "origin_prompt": "As a tool for determining grammatical correctness, review the sentence and categorize its grammar as either 'acceptable' or 'unacceptable':",
        "atk_acc": 0.0,
        "atk_prompt": "Aѕ a tool for determining grammatical regularity, reviewing the penalties and categorizе its grammar as ei𝚝her 'acceptable' or 'unacceptable':",
        "transfer_ori_acc": 74.9,
        "transfer_atk_acc": 55.50000000000001
    },
    {
        "dataset": "mrpc",
        "type": "task",
        "origin_acc": 58.09,
        "origin_prompt": "Compare the following sentences and determine if they share the same semantic meaning by responding with 'equivalent' or 'not_equivalent'.",
        "atk_acc": 0.49,
        "atk_prompt": "Comрare the later sentences and determine if they share the same semantic meaning by respoոding with 'equivalent' or 'not_equivalent'. ",
        "transfer_ori_acc": 82.1078431372549,
        "transfer_atk_acc": 81.61764705882352
    },
    {
        "dataset": "mrpc",
        "type": "role",
        "origin_acc": 53.43,
        "origin_prompt": "In the role of a sentence comparison analyst, assess the provided sentences and indicate if they convey the same meaning by selecting 'equivalent' or 'not_equivalent'.",
        "atk_acc": 0.0,
        "atk_prompt": "Iո the rloe of a sentence comparison analyst, asess the provided sentences and indicate if they convey the same meaning by selec𝚝ing 'equivalent' or 'not_equivalent'. ",
        "transfer_ori_acc": 81.12745098039215,
        "transfer_atk_acc": 83.08823529411765
    },
    {
        "dataset": "qnli",
        "type": "task",
        "origin_acc": 30.5,
        "origin_prompt": "Review the given context and question, and decide if the context contains enough information to support the answer by selecting 'entailment' or 'not_entailment'.",
        "atk_acc": 0.0,
        "atk_prompt": "Revisited the afforded contеxt and question, and decide if the context contains enough information to support the answer by selec𝚝ing 'entailment' or 'not_entailment'. ",
        "transfer_ori_acc": 85.2,
        "transfer_atk_acc": 85.39999999999999
    },
    {
        "dataset": "qnli",
        "type": "role",
        "origin_acc": 38.4,
        "origin_prompt": "As a textual inference expert, analyze if the answer to the question can be deduced from the provided context and select 'entailment' or 'not_entailment'.",
        "atk_acc": 0.1,
        "atk_prompt": "Aѕ a textual deduced expeⲅt, analyzes if the replying to the question can be deduced from the provided context and selec𝚝 'entailment' or 'not_entailment'. ",
        "transfer_ori_acc": 78.10000000000001,
        "transfer_atk_acc": 83.89999999999999
    },
    {
        "dataset": "qqp",
        "type": "task",
        "origin_acc": 23.9,
        "origin_prompt": "Can these two statements be considered equal in meaning? Answer with 'equivalent' or 'not_equivalent'.",
        "atk_acc": 0.2,
        "atk_prompt": "Possible these two statemen𝚝s be considered equal in meaing? Answer with 'equivalent' or 'not_equivalent'. ",
        "transfer_ori_acc": 87.8,
        "transfer_atk_acc": 87.8
    },
    {
        "dataset": "qqp",
        "type": "role",
        "origin_acc": 16.9,
        "origin_prompt": "In the role of a question similarity analyzer, examine the meaning of the provided questions and decide if they are equivalent, answering with 'equivalent' for equivalent questions or 'not_equivalent' for different questions.",
        "atk_acc": 0.0,
        "atk_prompt": "Iո the role of a question similarity analyzer, examine the meaning of the provided questions and decide if they are equivalent, answering with 'equivalent' for equivalent questions or 'not_equivalent' for different ques𝚝ions. ",
        "transfer_ori_acc": 87.1,
        "transfer_atk_acc": 87.2
    },
    {
        "dataset": "rte",
        "type": "task",
        "origin_acc": 45.13,
        "origin_prompt": "Does the relationship between the given sentences represent entailment or not_entailment? Respond with 'entailment' or 'not_entailment'.",
        "atk_acc": 1.08,
        "atk_prompt": "Ai the relationship between the given sentence represent entailment or not_entailment? Answer with 'entailment' or 'not_entailment'.",
        "transfer_ori_acc": 85.1985559566787,
        "transfer_atk_acc": 84.47653429602889
    },
    {
        "dataset": "rte",
        "type": "role",
        "origin_acc": 51.26,
        "origin_prompt": "In the role of a semantic relationship analyzer, examine the connection between the given sentences and decide if they exhibit entailment or not_entailment. Answer with 'entailment' or 'not_entailment':",
        "atk_acc": 0.36,
        "atk_prompt": "Onto the role of a semantic relationship offing, e×amine the connection between the given seոtences and decіde if they exhibit entailment or not_entailment. Answer with 'entailment' or 'not_entailment':",
        "transfer_ori_acc": 82.31046931407943,
        "transfer_atk_acc": 83.03249097472924
    },
    {
        "dataset": "sst2",
        "type": "task",
        "origin_acc": 48.05,
        "origin_prompt": "After examining the following expression, label its emotion as either 'positive' or 'negative':",
        "atk_acc": 2.06,
        "atk_prompt": "After examining the following expression, label its emo𝚝ion as neither 'positive' or 'negative': ",
        "transfer_ori_acc": 93.23394495412845,
        "transfer_atk_acc": 93.46330275229357
    },
    {
        "dataset": "sst2",
        "type": "role",
        "origin_acc": 37.5,
        "origin_prompt": "As an emotion detector, determine if the provided passage conveys a 'positive' or 'negative' sentiment. Classify:",
        "atk_acc": 1.03,
        "atk_prompt": "Because an emotion detector, determine if the povided passgae convey a 'positive' or 'negative' ambience. Classi𝚏y: ",
        "transfer_ori_acc": 94.83944954128441,
        "transfer_atk_acc": 91.85779816513761
    },
    {
        "dataset": "wnli",
        "type": "task",
        "origin_acc": 43.66,
        "origin_prompt": "Does the relationship between the given sentences represent entailment or not_entailment? Respond with 'entailment' or 'not_entailment'.",
        "atk_acc": 1.41,
        "atk_prompt": "Deos the relationship between the given sentences represent entailment or not_entailment? Answering with 'entailment' or 'not_entailment'.",
        "transfer_ori_acc": 61.97183098591549,
        "transfer_atk_acc": 59.154929577464785
    },
    {
        "dataset": "wnli",
        "type": "role",
        "origin_acc": 43.66,
        "origin_prompt": "In your role as an entailment analysis tool, assess the relationship between the given sentences and classify it as 'entailment' or 'not_entailment':",
        "atk_acc": 0.0,
        "atk_prompt": "During your roⅼe as an entailment analysis tool, evaluating the relationship between the given sentences and classifying it as 'entailment' or 'not_entailment':",
        "transfer_ori_acc": 56.33802816901409,
        "transfer_atk_acc": 57.74647887323944
    },
    {
        "dataset": "mmlu",
        "type": "task",
        "origin_acc": 25.67,
        "origin_prompt": "Evaluate the multiple-choice question about {} and select the most fitting response from 'A', 'B', 'C', or 'D'.",
        "atk_acc": 9.92,
        "atk_prompt": "Assess the mul𝚝iple-choice question about {} and seⅼect the most fitting response from 'A', 'B', 'C', or 'D'. ",
        "transfer_ori_acc": 44.50354609929078,
        "transfer_atk_acc": 44.50354609929078
    },
    {
        "dataset": "mmlu",
        "type": "role",
        "origin_acc": 24.39,
        "origin_prompt": "With your knowledge of {}, tackle the following multiple-choice question by choosing 'A', 'B', 'C', or 'D'.",
        "atk_acc": 9.57,
        "atk_prompt": "With your knowledge of {}, tackle the 𝚏ollowing multiple-choіce problem by choosing 'A', 'B', 'C', or 'D'.",
        "transfer_ori_acc": 43.97163120567376,
        "transfer_atk_acc": 45.744680851063826
    }
]