Spaces:

anonymous8
/

Rapid-Textual-Adversarial-Defense

Running

anonymous8/RPD-Demo

initial commit

4943752 over 1 year ago

No virus

1.73 kB

	"""

	Input Reduction
	====================
	(Pathologies of Neural Models Make Interpretations Difficult)

	"""
	from textattack import Attack
	from textattack.constraints.pre_transformation import (
	RepeatModification,
	StopwordModification,
	)
	from textattack.goal_functions import InputReduction
	from textattack.search_methods import GreedyWordSwapWIR
	from textattack.transformations import WordDeletion

	from .attack_recipe import AttackRecipe


	class InputReductionFeng2018(AttackRecipe):
	"""Feng, Wallace, Grissom, Iyyer, Rodriguez, Boyd-Graber. (2018).

	Pathologies of Neural Models Make Interpretations Difficult.

	https://arxiv.org/abs/1804.07781
	"""

	@staticmethod
	def build(model_wrapper):
	# At each step, we remove the word with the lowest importance value until
	# the model changes its prediction.
	transformation = WordDeletion()

	constraints = [RepeatModification(), StopwordModification()]
	#
	# Goal is untargeted classification
	#
	goal_function = InputReduction(model_wrapper, maximizable=True)
	#
	# "For each word in an input sentence, we measure its importance by the
	# change in the confidence of the original prediction when we remove
	# that word from the sentence."
	#
	# "Instead of looking at the words with high importance values—what
	# interpretation methods commonly do—we take a complementary approach
	# and study how the model behaves when the supposedly unimportant words are
	# removed."
	#
	search_method = GreedyWordSwapWIR(wir_method="delete")

	return Attack(goal_function, constraints, transformation, search_method)