""" Determine for if an attack has been successful in Classification --------------------------------------------------------------------- """ import numpy as np import torch from textattack.goal_function_results import ClassificationGoalFunctionResult from textattack.goal_functions import GoalFunction class ClassificationGoalFunction(GoalFunction): """A goal function defined on a model that outputs a probability for some number of classes.""" def _process_model_outputs(self, inputs, scores): """Processes and validates a list of model outputs. This is a task-dependent operation. For example, classification outputs need to have a softmax applied. """ # Automatically cast a list or ndarray of predictions to a tensor. if isinstance(scores, list) or isinstance(scores, np.ndarray): scores = torch.tensor(scores) # Ensure the returned value is now a tensor. if not isinstance(scores, torch.Tensor): raise TypeError( "Must have list, np.ndarray, or torch.Tensor of " f"scores. Got type {type(scores)}" ) # Validation check on model score dimensions if scores.ndim == 1: # Unsqueeze prediction, if it's been squeezed by the model. if len(inputs) == 1: scores = scores.unsqueeze(dim=0) else: raise ValueError( f"Model return score of shape {scores.shape} for {len(inputs)} inputs." ) elif scores.ndim != 2: # If model somehow returns too may dimensions, throw an error. raise ValueError( f"Model return score of shape {scores.shape} for {len(inputs)} inputs." ) elif scores.shape[0] != len(inputs): # If model returns an incorrect number of scores, throw an error. raise ValueError( f"Model return score of shape {scores.shape} for {len(inputs)} inputs." ) elif not ((scores.sum(dim=1) - 1).abs() < 1e-6).all(): # Values in each row should sum up to 1. The model should return a # set of numbers corresponding to probabilities, which should add # up to 1. Since they are `torch.float` values, allow a small # error in the summation. scores = torch.nn.functional.softmax(scores, dim=1) if not ((scores.sum(dim=1) - 1).abs() < 1e-6).all(): raise ValueError("Model scores do not add up to 1.") return scores.cpu() def _goal_function_result_type(self): """Returns the class of this goal function's results.""" return ClassificationGoalFunctionResult def extra_repr_keys(self): return [] def _get_displayed_output(self, raw_output): return int(raw_output.argmax())