PromptBench / parse.py
Immortalise
init
1c79925
raw
history blame
7.95 kB
import numpy as np
import re
def split_markdown_by_title(markdown_file):
with open(markdown_file, 'r', encoding='utf-8') as f:
content = f.read()
re_str = "# cola|# mnli|# mrpc|# qnli|# qqp|# rte|# sst2|# wnli|# mmlu|# squad_v2|# iwslt|# un_multi|# math"
datasets = ["# cola", "# mnli", "# mrpc", "# qnli", "# qqp", "# rte", "# sst2", "# wnli",
"# mmlu", "# squad_v2", "# iwslt", "# un_multi", "# math"]
# re_str = "# cola|# mnli|# mrpc|# qnli|# qqp|# rte|# sst2|# wnli"
# datasets = ["# cola", "# mnli", "# mrpc", "# qnli", "# qqp", "# rte", "# sst2", "# wnli"]
primary_sections = re.split(re_str, content)[1:]
assert len(primary_sections) == len(datasets)
all_sections_dict = {}
for dataset, primary_section in zip(datasets, primary_sections):
re_str = "## "
results = re.split(re_str, primary_section)
keywords = ["10 prompts", "bertattack", "checklist", "deepwordbug", "stresstest",
"textfooler", "textbugger", "translation"]
secondary_sections_dict = {}
for res in results:
for keyword in keywords:
if keyword in res.lower():
secondary_sections_dict[keyword] = res
break
all_sections_dict[dataset] = secondary_sections_dict
return all_sections_dict
# def prompts_understanding(sections_dict):
# for dataset in sections_dict.keys():
# # print(dataset)
# for title in sections_dict[dataset].keys():
# if title == "10 prompts":
# prompts = sections_dict[dataset][title].split("\n")
# num = 0
# task_prompts_acc = []
# role_prompts_acc = []
# for prompt in prompts:
# if "Acc: " not in prompt:
# continue
# else:
# import re
# num += 1
# match = re.search(r'Acc: (\d+\.\d+)%', prompt)
# if match:
# number = float(match.group(1))
# if num <= 10:
# task_prompts_acc.append(number)
# else:
# role_prompts_acc.append(number)
# print(task_prompts_acc)
# print(role_prompts_acc)
import os
def list_files(directory):
files = [os.path.join(directory, d) for d in os.listdir(directory) if not os.path.isdir(os.path.join(directory, d))]
return files
def convert_model_name(attack):
attack_name = {
"T5": "t5",
"UL2": "ul2",
"Vicuna": "vicuna",
"ChatGPT": "chatgpt",
}
return attack_name[attack]
def convert_attack_name(attack):
attack_name = {
"BertAttack": "bertattack",
"CheckList": "checklist",
"DeepWordBug": "deepwordbug",
"StressTest": "stresstest",
"TextFooler": "textfooler",
"TextBugger": "textbugger",
"Semantic": "translation",
}
return attack_name[attack]
def convert_dataset_name(dataset):
dataset_name = {
"CoLA": "# cola",
"MNLI": "# mnli",
"MRPC": "# mrpc",
"QNLI": "# qnli",
"QQP": "# qqp",
"RTE": "# rte",
"SST-2": "# sst2",
"WNLI": "# wnli",
"MMLU": "# mmlu",
"SQuAD V2": "# squad_v2",
"IWSLT": "# iwslt",
"UN Multi": "# un_multi",
"Math": "# math",
"Avg": "Avg",
}
return dataset_name[dataset]
def retrieve(model_name, dataset_name, attack_name, prompt_type):
model_name = convert_model_name(model_name)
dataset_name = convert_dataset_name(dataset_name)
attack_name = convert_attack_name(attack_name)
if "zero" in prompt_type:
shot = "zeroshot"
else:
shot = "fewshot"
if "task" in prompt_type:
prompt_type = "task"
else:
prompt_type = "role"
directory_path = "./db"
md_dir = os.path.join(directory_path, model_name + "_" + shot + ".md")
sections_dict = split_markdown_by_title(md_dir)
for cur_dataset in sections_dict.keys():
if cur_dataset == dataset_name:
dataset_dict = sections_dict[cur_dataset]
for cur_attack in dataset_dict.keys():
if cur_attack == attack_name:
pass
if attack_name == "translation":
results = dataset_dict[attack_name].split("\n")
atk_acc = []
for result in results:
if "acc: " not in result:
continue
import re
match_atk = re.search(r'acc: (\d+\.\d+)%', result)
number_atk = float(match_atk.group(1))
atk_acc.append(number_atk)
sorted_atk_acc = sorted(atk_acc)[:6]
elif title in ["bertattack", "checklist", "deepwordbug", "stresstest", "textfooler", "textbugger"]:
results = sections_dict[dataset][title].split("Original prompt: ")
num = 0
for result in results:
if "Attacked prompt: " not in result:
continue
num += 1
import re
match_origin = re.search(r'Original acc: (\d+\.\d+)%', result)
match_atk = re.search(r'attacked acc: (\d+\.\d+)%', result)
if match_origin and match_atk:
number_origin = float(match_origin.group(1))
number_atk = float(match_atk.group(1))
summary[title][dataset].append((number_origin - number_atk)/number_origin)
summary[title]["Avg"].append((number_origin - number_atk)/number_origin)
# print(model_shot, dataset, title, len(summary[attack][dataset]), num)
# for atk in summary.keys():
# for dataset in summary[atk].keys():
# # if atk == "translation":
# print(atk, dataset, len(summary[atk][dataset]))
# # print(summary[atk][dataset][:10])
output_dict = {}
sorted_atk_name = ["TextBugger", "DeepWordBug", "TextFooler", "BertAttack", "CheckList", "StressTest", "Semantic"]
sorted_dataset_name = ["SST-2", "CoLA", "QQP", "MRPC", "MNLI", "QNLI", "RTE", "WNLI", "MMLU", "SQuAD V2", "IWSLT", "UN Multi", "Math"]
for atk in sorted_atk_name:
output_dict[atk] = {}
for dataset in sorted_dataset_name:
output_dict[atk][dataset] = ""
for sorted_atk in sorted_atk_name:
for attack, dataset_drop_rates in summary.items():
# attack = convert_attack_name(attack)
if convert_attack_name(attack) == sorted_atk:
for sorted_dataset in sorted_dataset_name:
for dataset, drop_rates in dataset_drop_rates.items():
if convert_dataset_name(dataset) == sorted_dataset:
if len(drop_rates) > 0:
output_dict[sorted_atk][sorted_dataset] = "{:.2f}".format(sum(drop_rates)/len(drop_rates)) + "\scriptsize{$\pm$" + "{:.2f}".format(np.std(drop_rates)) + "}"
else:
output_dict[sorted_atk][sorted_dataset] = "-"
total_drop_rate = summary[attack]["Avg"]
output_dict[sorted_atk]["Avg"] = "{:.2f}".format(np.mean(total_drop_rate)) + "\scriptsize{$\pm$" + "{:.2f}".format(np.std(total_drop_rate)) + "}"