|
""" |
|
Kuleshov2017 |
|
============== |
|
(Adversarial Examples for Natural Language Classification Problems) |
|
|
|
""" |
|
from textattack import Attack |
|
from textattack.constraints.grammaticality.language_models import GPT2 |
|
from textattack.constraints.overlap import MaxWordsPerturbed |
|
from textattack.constraints.pre_transformation import ( |
|
RepeatModification, |
|
StopwordModification, |
|
) |
|
from textattack.constraints.semantics.sentence_encoders import ThoughtVector |
|
from textattack.goal_functions import UntargetedClassification |
|
from textattack.search_methods import GreedySearch |
|
from textattack.transformations import WordSwapEmbedding |
|
|
|
from .attack_recipe import AttackRecipe |
|
|
|
|
|
class Kuleshov2017(AttackRecipe): |
|
"""Kuleshov, V. et al. |
|
|
|
Generating Natural Language Adversarial Examples. |
|
|
|
https://openreview.net/pdf?id=r1QZ3zbAZ. |
|
""" |
|
|
|
@staticmethod |
|
def build(model_wrapper): |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
transformation = WordSwapEmbedding(max_candidates=15) |
|
|
|
|
|
|
|
constraints = [RepeatModification(), StopwordModification()] |
|
|
|
|
|
|
|
constraints.append(MaxWordsPerturbed(max_percent=0.5)) |
|
|
|
|
|
|
|
constraints.append(ThoughtVector(threshold=0.2, metric="max_euclidean")) |
|
|
|
|
|
|
|
|
|
constraints.append(GPT2(max_log_prob_diff=2.0)) |
|
|
|
|
|
|
|
|
|
goal_function = UntargetedClassification(model_wrapper, target_max_score=0.7) |
|
|
|
|
|
|
|
search_method = GreedySearch() |
|
|
|
return Attack(goal_function, constraints, transformation, search_method) |
|
|