anonymous8/RPD-Demo
initial commit
4943752
"""
CLARE Recipe
=============
(Contextualized Perturbation for Textual Adversarial Attack)
"""
import transformers
from textattack import Attack
from textattack.constraints.pre_transformation import (
RepeatModification,
StopwordModification,
)
from textattack.constraints.semantics.sentence_encoders import UniversalSentenceEncoder
from textattack.goal_functions import UntargetedClassification
from textattack.search_methods import GreedySearch
from textattack.transformations import (
CompositeTransformation,
WordInsertionMaskedLM,
WordMergeMaskedLM,
WordSwapMaskedLM,
)
from .attack_recipe import AttackRecipe
class CLARE2020(AttackRecipe):
"""Li, Zhang, Peng, Chen, Brockett, Sun, Dolan.
"Contextualized Perturbation for Textual Adversarial Attack" (Li et al., 2020)
https://arxiv.org/abs/2009.07502
This method uses greedy search with replace, merge, and insertion transformations that leverage a
pretrained language model. It also uses USE similarity constraint.
"""
@staticmethod
def build(model_wrapper):
# "This paper presents CLARE, a ContextuaLized AdversaRial Example generation model
# that produces fluent and grammatical outputs through a mask-then-infill procedure.
# CLARE builds on a pre-trained masked language model and modifies the inputs in a context-aware manner.
# We propose three contex-tualized perturbations, Replace, Insert and Merge, allowing for generating outputs of
# varied lengths."
#
# "We experiment with a distilled version of RoBERTa (RoBERTa_{distill}; Sanh et al., 2019)
# as the masked language model for contextualized infilling."
# Because BAE and CLARE both use similar replacement papers, we use BAE's replacement method here.
shared_masked_lm = transformers.AutoModelForCausalLM.from_pretrained(
"distilroberta-base"
)
shared_tokenizer = transformers.AutoTokenizer.from_pretrained(
"distilroberta-base"
)
transformation = CompositeTransformation(
[
WordSwapMaskedLM(
method="bae",
masked_language_model=shared_masked_lm,
tokenizer=shared_tokenizer,
max_candidates=50,
min_confidence=5e-4,
),
WordInsertionMaskedLM(
masked_language_model=shared_masked_lm,
tokenizer=shared_tokenizer,
max_candidates=50,
min_confidence=0.0,
),
WordMergeMaskedLM(
masked_language_model=shared_masked_lm,
tokenizer=shared_tokenizer,
max_candidates=50,
min_confidence=5e-3,
),
]
)
#
# Don't modify the same word twice or stopwords.
#
constraints = [RepeatModification(), StopwordModification()]
# "A common choice of sim(·,·) is to encode sentences using neural networks,
# and calculate their cosine similarity in the embedding space (Jin et al., 2020)."
# The original implementation uses similarity of 0.7.
use_constraint = UniversalSentenceEncoder(
threshold=0.7,
metric="cosine",
compare_against_original=True,
window_size=15,
skip_text_shorter_than_window=True,
)
constraints.append(use_constraint)
# Goal is untargeted classification.
# "The score is then the negative probability of predicting the gold label from f, using [x_{adv}] as the input"
goal_function = UntargetedClassification(model_wrapper)
# "To achieve this, we iteratively apply the actions,
# and first select those minimizing the probability of outputting the gold label y from f."
#
# "Only one of the three actions can be applied at each position, and we select the one with the highest score."
#
# "Actions are iteratively applied to the input, until an adversarial example is found or a limit of actions T
# is reached.
# Each step selects the highest-scoring action from the remaining ones."
#
search_method = GreedySearch()
return Attack(goal_function, constraints, transformation, search_method)