|
""" |
|
CLARE Recipe |
|
============= |
|
|
|
(Contextualized Perturbation for Textual Adversarial Attack) |
|
|
|
""" |
|
|
|
import transformers |
|
|
|
from textattack import Attack |
|
from textattack.constraints.pre_transformation import ( |
|
RepeatModification, |
|
StopwordModification, |
|
) |
|
from textattack.constraints.semantics.sentence_encoders import UniversalSentenceEncoder |
|
from textattack.goal_functions import UntargetedClassification |
|
from textattack.search_methods import GreedySearch |
|
from textattack.transformations import ( |
|
CompositeTransformation, |
|
WordInsertionMaskedLM, |
|
WordMergeMaskedLM, |
|
WordSwapMaskedLM, |
|
) |
|
|
|
from .attack_recipe import AttackRecipe |
|
|
|
|
|
class CLARE2020(AttackRecipe): |
|
"""Li, Zhang, Peng, Chen, Brockett, Sun, Dolan. |
|
|
|
"Contextualized Perturbation for Textual Adversarial Attack" (Li et al., 2020) |
|
|
|
https://arxiv.org/abs/2009.07502 |
|
|
|
This method uses greedy search with replace, merge, and insertion transformations that leverage a |
|
pretrained language model. It also uses USE similarity constraint. |
|
""" |
|
|
|
@staticmethod |
|
def build(model_wrapper): |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
shared_masked_lm = transformers.AutoModelForCausalLM.from_pretrained( |
|
"distilroberta-base" |
|
) |
|
shared_tokenizer = transformers.AutoTokenizer.from_pretrained( |
|
"distilroberta-base" |
|
) |
|
transformation = CompositeTransformation( |
|
[ |
|
WordSwapMaskedLM( |
|
method="bae", |
|
masked_language_model=shared_masked_lm, |
|
tokenizer=shared_tokenizer, |
|
max_candidates=50, |
|
min_confidence=5e-4, |
|
), |
|
WordInsertionMaskedLM( |
|
masked_language_model=shared_masked_lm, |
|
tokenizer=shared_tokenizer, |
|
max_candidates=50, |
|
min_confidence=0.0, |
|
), |
|
WordMergeMaskedLM( |
|
masked_language_model=shared_masked_lm, |
|
tokenizer=shared_tokenizer, |
|
max_candidates=50, |
|
min_confidence=5e-3, |
|
), |
|
] |
|
) |
|
|
|
|
|
|
|
|
|
constraints = [RepeatModification(), StopwordModification()] |
|
|
|
|
|
|
|
|
|
use_constraint = UniversalSentenceEncoder( |
|
threshold=0.7, |
|
metric="cosine", |
|
compare_against_original=True, |
|
window_size=15, |
|
skip_text_shorter_than_window=True, |
|
) |
|
constraints.append(use_constraint) |
|
|
|
|
|
|
|
goal_function = UntargetedClassification(model_wrapper) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
search_method = GreedySearch() |
|
|
|
return Attack(goal_function, constraints, transformation, search_method) |
|
|