import numpy as np import re def split_markdown_by_title(markdown_file): with open(markdown_file, 'r', encoding='utf-8') as f: content = f.read() re_str = "# cola|# mnli|# mrpc|# qnli|# qqp|# rte|# sst2|# wnli|# mmlu|# squad_v2|# iwslt|# un_multi|# math" datasets = ["# cola", "# mnli", "# mrpc", "# qnli", "# qqp", "# rte", "# sst2", "# wnli", "# mmlu", "# squad_v2", "# iwslt", "# un_multi", "# math"] # re_str = "# cola|# mnli|# mrpc|# qnli|# qqp|# rte|# sst2|# wnli" # datasets = ["# cola", "# mnli", "# mrpc", "# qnli", "# qqp", "# rte", "# sst2", "# wnli"] primary_sections = re.split(re_str, content)[1:] assert len(primary_sections) == len(datasets) all_sections_dict = {} for dataset, primary_section in zip(datasets, primary_sections): re_str = "## " results = re.split(re_str, primary_section) keywords = ["10 prompts", "bertattack", "checklist", "deepwordbug", "stresstest", "textfooler", "textbugger", "translation"] secondary_sections_dict = {} for res in results: for keyword in keywords: if keyword in res.lower(): secondary_sections_dict[keyword] = res break all_sections_dict[dataset] = secondary_sections_dict return all_sections_dict # def prompts_understanding(sections_dict): # for dataset in sections_dict.keys(): # # print(dataset) # for title in sections_dict[dataset].keys(): # if title == "10 prompts": # prompts = sections_dict[dataset][title].split("\n") # num = 0 # task_prompts_acc = [] # role_prompts_acc = [] # for prompt in prompts: # if "Acc: " not in prompt: # continue # else: # import re # num += 1 # match = re.search(r'Acc: (\d+\.\d+)%', prompt) # if match: # number = float(match.group(1)) # if num <= 10: # task_prompts_acc.append(number) # else: # role_prompts_acc.append(number) # print(task_prompts_acc) # print(role_prompts_acc) import os def list_files(directory): files = [os.path.join(directory, d) for d in os.listdir(directory) if not os.path.isdir(os.path.join(directory, d))] return files def convert_model_name(attack): attack_name = { "T5": "t5", "UL2": "ul2", "Vicuna": "vicuna", "ChatGPT": "chatgpt", } return attack_name[attack] def convert_attack_name(attack): attack_name = { "BertAttack": "bertattack", "CheckList": "checklist", "DeepWordBug": "deepwordbug", "StressTest": "stresstest", "TextFooler": "textfooler", "TextBugger": "textbugger", "Semantic": "translation", } return attack_name[attack] def convert_dataset_name(dataset): dataset_name = { "CoLA": "# cola", "MNLI": "# mnli", "MRPC": "# mrpc", "QNLI": "# qnli", "QQP": "# qqp", "RTE": "# rte", "SST-2": "# sst2", "WNLI": "# wnli", "MMLU": "# mmlu", "SQuAD V2": "# squad_v2", "IWSLT": "# iwslt", "UN Multi": "# un_multi", "Math": "# math", "Avg": "Avg", } return dataset_name[dataset] def retrieve(model_name, dataset_name, attack_name, prompt_type): model_name = convert_model_name(model_name) dataset_name = convert_dataset_name(dataset_name) attack_name = convert_attack_name(attack_name) if "zero" in prompt_type: shot = "zeroshot" else: shot = "fewshot" if "task" in prompt_type: prompt_type = "task" else: prompt_type = "role" directory_path = "./db" md_dir = os.path.join(directory_path, model_name + "_" + shot + ".md") sections_dict = split_markdown_by_title(md_dir) for cur_dataset in sections_dict.keys(): if cur_dataset == dataset_name: dataset_dict = sections_dict[cur_dataset] for cur_attack in dataset_dict.keys(): if cur_attack == attack_name: pass if attack_name == "translation": results = dataset_dict[attack_name].split("\n") atk_acc = [] for result in results: if "acc: " not in result: continue import re match_atk = re.search(r'acc: (\d+\.\d+)%', result) number_atk = float(match_atk.group(1)) atk_acc.append(number_atk) sorted_atk_acc = sorted(atk_acc)[:6] elif title in ["bertattack", "checklist", "deepwordbug", "stresstest", "textfooler", "textbugger"]: results = sections_dict[dataset][title].split("Original prompt: ") num = 0 for result in results: if "Attacked prompt: " not in result: continue num += 1 import re match_origin = re.search(r'Original acc: (\d+\.\d+)%', result) match_atk = re.search(r'attacked acc: (\d+\.\d+)%', result) if match_origin and match_atk: number_origin = float(match_origin.group(1)) number_atk = float(match_atk.group(1)) summary[title][dataset].append((number_origin - number_atk)/number_origin) summary[title]["Avg"].append((number_origin - number_atk)/number_origin) # print(model_shot, dataset, title, len(summary[attack][dataset]), num) # for atk in summary.keys(): # for dataset in summary[atk].keys(): # # if atk == "translation": # print(atk, dataset, len(summary[atk][dataset])) # # print(summary[atk][dataset][:10]) output_dict = {} sorted_atk_name = ["TextBugger", "DeepWordBug", "TextFooler", "BertAttack", "CheckList", "StressTest", "Semantic"] sorted_dataset_name = ["SST-2", "CoLA", "QQP", "MRPC", "MNLI", "QNLI", "RTE", "WNLI", "MMLU", "SQuAD V2", "IWSLT", "UN Multi", "Math"] for atk in sorted_atk_name: output_dict[atk] = {} for dataset in sorted_dataset_name: output_dict[atk][dataset] = "" for sorted_atk in sorted_atk_name: for attack, dataset_drop_rates in summary.items(): # attack = convert_attack_name(attack) if convert_attack_name(attack) == sorted_atk: for sorted_dataset in sorted_dataset_name: for dataset, drop_rates in dataset_drop_rates.items(): if convert_dataset_name(dataset) == sorted_dataset: if len(drop_rates) > 0: output_dict[sorted_atk][sorted_dataset] = "{:.2f}".format(sum(drop_rates)/len(drop_rates)) + "\scriptsize{$\pm$" + "{:.2f}".format(np.std(drop_rates)) + "}" else: output_dict[sorted_atk][sorted_dataset] = "-" total_drop_rate = summary[attack]["Avg"] output_dict[sorted_atk]["Avg"] = "{:.2f}".format(np.mean(total_drop_rate)) + "\scriptsize{$\pm$" + "{:.2f}".format(np.std(total_drop_rate)) + "}"