File size: 4,105 Bytes
4943752 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 |
"""
TextBugger
===============
(TextBugger: Generating Adversarial Text Against Real-world Applications)
"""
from textattack import Attack
from textattack.constraints.pre_transformation import (
RepeatModification,
StopwordModification,
)
from textattack.constraints.semantics.sentence_encoders import UniversalSentenceEncoder
from textattack.goal_functions import UntargetedClassification
from textattack.search_methods import GreedyWordSwapWIR
from textattack.transformations import (
CompositeTransformation,
WordSwapEmbedding,
WordSwapHomoglyphSwap,
WordSwapNeighboringCharacterSwap,
WordSwapRandomCharacterDeletion,
WordSwapRandomCharacterInsertion,
)
from .attack_recipe import AttackRecipe
class TextBuggerLi2018(AttackRecipe):
"""Li, J., Ji, S., Du, T., Li, B., and Wang, T. (2018).
TextBugger: Generating Adversarial Text Against Real-world Applications.
https://arxiv.org/abs/1812.05271
"""
@staticmethod
def build(model_wrapper):
#
# we propose five bug generation methods for TEXTBUGGER:
#
transformation = CompositeTransformation(
[
# (1) Insert: Insert a space into the word.
# Generally, words are segmented by spaces in English. Therefore,
# we can deceive classifiers by inserting spaces into words.
WordSwapRandomCharacterInsertion(
random_one=True,
letters_to_insert=" ",
skip_first_char=True,
skip_last_char=True,
),
# (2) Delete: Delete a random character of the word except for the first
# and the last character.
WordSwapRandomCharacterDeletion(
random_one=True, skip_first_char=True, skip_last_char=True
),
# (3) Swap: Swap random two adjacent letters in the word but do not
# alter the first or last letter. This is a common occurrence when
# typing quickly and is easy to implement.
WordSwapNeighboringCharacterSwap(
random_one=True, skip_first_char=True, skip_last_char=True
),
# (4) Substitute-C (Sub-C): Replace characters with visually similar
# characters (e.g., replacing βoβ with β0β, βlβ with β1β, βaβ with β@β)
# or adjacent characters in the keyboard (e.g., replacing βmβ with βnβ).
WordSwapHomoglyphSwap(),
# (5) Substitute-W
# (Sub-W): Replace a word with its topk nearest neighbors in a
# context-aware word vector space. Specifically, we use the pre-trained
# GloVe model [30] provided by Stanford for word embedding and set
# topk = 5 in the experiment.
WordSwapEmbedding(max_candidates=5),
]
)
constraints = [RepeatModification(), StopwordModification()]
# In our experiment, we first use the Universal Sentence
# Encoder [7], a model trained on a number of natural language
# prediction tasks that require modeling the meaning of word
# sequences, to encode sentences into high dimensional vectors.
# Then, we use the cosine similarity to measure the semantic
# similarity between original texts and adversarial texts.
# ... "Furthermore, the semantic similarity threshold \eps is set
# as 0.8 to guarantee a good trade-off between quality and
# strength of the generated adversarial text."
constraints.append(UniversalSentenceEncoder(threshold=0.8))
#
# Goal is untargeted classification
#
goal_function = UntargetedClassification(model_wrapper)
#
# Greedily swap words with "Word Importance Ranking".
#
search_method = GreedyWordSwapWIR(wir_method="delete")
return Attack(goal_function, constraints, transformation, search_method)
|