|
""" |
|
|
|
HotFlip |
|
=========== |
|
(HotFlip: White-Box Adversarial Examples for Text Classification) |
|
|
|
""" |
|
from textattack import Attack |
|
from textattack.constraints.grammaticality import PartOfSpeech |
|
from textattack.constraints.overlap import MaxWordsPerturbed |
|
from textattack.constraints.pre_transformation import ( |
|
RepeatModification, |
|
StopwordModification, |
|
) |
|
from textattack.constraints.semantics import WordEmbeddingDistance |
|
from textattack.goal_functions import UntargetedClassification |
|
from textattack.search_methods import BeamSearch |
|
from textattack.transformations import WordSwapGradientBased |
|
|
|
from .attack_recipe import AttackRecipe |
|
|
|
|
|
class HotFlipEbrahimi2017(AttackRecipe): |
|
"""Ebrahimi, J. et al. (2017) |
|
|
|
HotFlip: White-Box Adversarial Examples for Text Classification |
|
|
|
https://arxiv.org/abs/1712.06751 |
|
|
|
This is a reproduction of the HotFlip word-level attack (section 5 of the |
|
paper). |
|
""" |
|
|
|
@staticmethod |
|
def build(model_wrapper): |
|
|
|
|
|
|
|
|
|
transformation = WordSwapGradientBased(model_wrapper, top_n=1) |
|
|
|
|
|
|
|
constraints = [RepeatModification(), StopwordModification()] |
|
|
|
|
|
|
|
|
|
constraints.append(MaxWordsPerturbed(max_num_words=2)) |
|
|
|
|
|
|
|
|
|
constraints.append(WordEmbeddingDistance(min_cos_sim=0.8)) |
|
|
|
|
|
|
|
constraints.append(PartOfSpeech()) |
|
|
|
|
|
|
|
goal_function = UntargetedClassification(model_wrapper) |
|
|
|
|
|
|
|
|
|
|
|
search_method = BeamSearch(beam_width=10) |
|
|
|
return Attack(goal_function, constraints, transformation, search_method) |
|
|