Martijn van Beers commited on
Commit
4f67e27
1 Parent(s): 66d5973

Add 'classic' rollout

Browse files
app.py CHANGED
@@ -8,27 +8,30 @@ sys.path.append("lib")
8
  import torch
9
 
10
  from roberta2 import RobertaForSequenceClassification
 
 
11
  from gradient_rollout import GradientRolloutExplainer
 
12
  from integrated_gradients import IntegratedGradientsExplainer
13
- from transformers import AutoModelForSequenceClassification
14
- from transformers import AutoTokenizer
15
- from captum.attr import LayerIntegratedGradients
16
- from captum.attr import visualization
17
- import util
18
- import torch
19
 
20
- ig_explainer = IntegratedGradientsExplainer()
21
- gr_explainer = GradientRolloutExplainer()
 
 
 
 
 
22
 
23
- def run(sent, rollout, ig, ig_baseline):
24
- a = gr_explainer(sent, rollout)
25
- b = ig_explainer(sent, ig, ig_baseline)
26
- return a, b
 
27
 
28
  examples = pandas.read_csv("examples.csv").to_numpy().tolist()
29
 
30
  with gradio.Blocks(title="Explanations with attention rollout") as iface:
31
- util.Markdown(pathlib.Path("description.md"))
32
  with gradio.Row(equal_height=True):
33
  with gradio.Column(scale=4):
34
  sent = gradio.Textbox(label="Input sentence")
@@ -36,19 +39,54 @@ with gradio.Blocks(title="Explanations with attention rollout") as iface:
36
  but = gradio.Button("Submit")
37
  with gradio.Row(equal_height=True):
38
  with gradio.Column():
39
- rollout_layer = gradio.Slider(minimum=0, maximum=12, value=8, step=1, label="Select rollout start layer")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  rollout_result = gradio.HTML()
41
  with gradio.Column():
42
- ig_layer = gradio.Slider(minimum=0, maximum=12, value=0, step=1, label="Select IG layer")
43
- ig_baseline = gradio.Dropdown(label="Baseline token", choices=['Unknown', 'Padding'], value="Unknown")
 
 
44
  ig_result = gradio.HTML()
45
  gradio.Examples(examples, [sent])
46
  with gradio.Accordion("Some more details"):
47
- util.Markdown(pathlib.Path("notice.md"))
48
 
49
- rollout_layer.change(gr_explainer, [sent, rollout_layer], rollout_result)
 
50
  ig_layer.change(ig_explainer, [sent, ig_layer, ig_baseline], ig_result)
51
- but.click(run, [sent, rollout_layer, ig_layer, ig_baseline], [rollout_result, ig_result])
 
 
 
52
 
53
 
54
  iface.launch()
 
8
  import torch
9
 
10
  from roberta2 import RobertaForSequenceClassification
11
+ from transformers import AutoTokenizer
12
+
13
  from gradient_rollout import GradientRolloutExplainer
14
+ from rollout import RolloutExplainer
15
  from integrated_gradients import IntegratedGradientsExplainer
 
 
 
 
 
 
16
 
17
+ device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
18
+ model = RobertaForSequenceClassification.from_pretrained("textattack/roberta-base-SST-2").to(device)
19
+ tokenizer = AutoTokenizer.from_pretrained("textattack/roberta-base-SST-2")
20
+
21
+ ig_explainer = IntegratedGradientsExplainer(model, tokenizer)
22
+ gr_explainer = GradientRolloutExplainer(model, tokenizer)
23
+ ro_explainer = RolloutExplainer(model, tokenizer)
24
 
25
+ def run(sent, gradient, rollout, ig, ig_baseline):
26
+ a = gr_explainer(sent, gradient)
27
+ b = ro_explainer(sent, rollout)
28
+ c = ig_explainer(sent, ig, ig_baseline)
29
+ return a, b, c
30
 
31
  examples = pandas.read_csv("examples.csv").to_numpy().tolist()
32
 
33
  with gradio.Blocks(title="Explanations with attention rollout") as iface:
34
+ gradio.Markdown(pathlib.Path("description.md").read_text)
35
  with gradio.Row(equal_height=True):
36
  with gradio.Column(scale=4):
37
  sent = gradio.Textbox(label="Input sentence")
 
39
  but = gradio.Button("Submit")
40
  with gradio.Row(equal_height=True):
41
  with gradio.Column():
42
+ rollout_layer = gradio.Slider(
43
+ minimum=1,
44
+ maximum=12,
45
+ value=1,
46
+ step=1,
47
+ label="Select rollout start layer"
48
+ )
49
+ with gradio.Column():
50
+ gradient_layer = gradio.Slider(
51
+ minimum=1,
52
+ maximum=12,
53
+ value=8,
54
+ step=1,
55
+ label="Select gradient rollout start layer"
56
+ )
57
+ with gradio.Column():
58
+ ig_layer = gradio.Slider(
59
+ minimum=0,
60
+ maximum=12,
61
+ value=0,
62
+ step=1,
63
+ label="Select IG layer"
64
+ )
65
+ ig_baseline = gradio.Dropdown(
66
+ label="Baseline token",
67
+ choices=['Unknown', 'Padding'], value="Unknown"
68
+ )
69
+ with gradio.Row(equal_height=True):
70
+ with gradio.Column():
71
+ gradio.Markdown("### Attention Rollout")
72
  rollout_result = gradio.HTML()
73
  with gradio.Column():
74
+ gradio.Markdown("### Gradient-weighted Attention Rollout")
75
+ gradient_result = gradio.HTML()
76
+ with gradio.Column():
77
+ gradio.Markdown("### Layer-Integrated Gradients")
78
  ig_result = gradio.HTML()
79
  gradio.Examples(examples, [sent])
80
  with gradio.Accordion("Some more details"):
81
+ gradio.Markdown(pathlib.Path("notice.md").read_text)
82
 
83
+ gradient_layer.change(gr_explainer, [sent, gradient_layer], gradient_result)
84
+ rollout_layer.change(ro_explainer, [sent, rollout_layer], rollout_result)
85
  ig_layer.change(ig_explainer, [sent, ig_layer, ig_baseline], ig_result)
86
+ but.click(run,
87
+ inputs=[sent, gradient_layer, rollout_layer, ig_layer, ig_baseline],
88
+ outputs=[gradient_result, rollout_result, ig_result]
89
+ )
90
 
91
 
92
  iface.launch()
lib/ExplanationGenerator.py CHANGED
@@ -25,8 +25,8 @@ class Generator:
25
  self.key = key
26
  self.model.eval()
27
 
28
- def forward(self, input_ids, attention_mask):
29
- return self.model(input_ids, attention_mask)
30
 
31
  def _calculate_gradients(self, output, index, do_relprop=True):
32
  if index == None:
@@ -72,7 +72,6 @@ class Generator:
72
  rollout[:, 0, 0] = rollout[:, 0].min()
73
  return rollout[:, 0]
74
 
75
-
76
  def generate_LRP_last_layer(self, input_ids, attention_mask,
77
  index=None):
78
  output = self.model(input_ids=input_ids, attention_mask=attention_mask)[0]
@@ -117,7 +116,7 @@ class Generator:
117
  all_layer_attentions.append(avg_heads)
118
  rollout = compute_rollout_attention(all_layer_attentions, start_layer=start_layer)
119
  rollout[:, 0, 0] = 0
120
- return rollout[:, 0]
121
 
122
  def generate_attn_gradcam(self, input_ids, attention_mask, index=None):
123
  output = self.model(input_ids=input_ids, attention_mask=attention_mask)[0]
@@ -148,12 +147,14 @@ class Generator:
148
  return torch.matmul(cam_ss, R_ss)
149
 
150
  output = self.model(input_ids=input_ids, attention_mask=attention_mask)[0]
151
- blocks = _get_module_from_name(self.model, self.key)
 
152
 
153
  num_tokens = input_ids.size(-1)
154
  R = torch.eye(num_tokens).expand(output.size(0), -1, -1).clone().to(output.device)
155
 
156
- for i, blk in enumerate(model.roberta.encoder.layer):
 
157
  if i < start_layer:
158
  continue
159
  grad = blk.attention.self.get_attn_gradients().detach()
@@ -161,5 +162,7 @@ class Generator:
161
  cam = avg_heads(cam, grad)
162
  joint = apply_self_attention_rules(R, cam)
163
  R += joint
164
- return R[:, 0, 1:-1]
 
 
165
 
 
25
  self.key = key
26
  self.model.eval()
27
 
28
+ def tokens_from_ids(self, ids):
29
+ return list(map(lambda s: s[1:] if s[0] == "Ġ" else s, self.tokenizer.convert_ids_to_tokens(ids)))
30
 
31
  def _calculate_gradients(self, output, index, do_relprop=True):
32
  if index == None:
 
72
  rollout[:, 0, 0] = rollout[:, 0].min()
73
  return rollout[:, 0]
74
 
 
75
  def generate_LRP_last_layer(self, input_ids, attention_mask,
76
  index=None):
77
  output = self.model(input_ids=input_ids, attention_mask=attention_mask)[0]
 
116
  all_layer_attentions.append(avg_heads)
117
  rollout = compute_rollout_attention(all_layer_attentions, start_layer=start_layer)
118
  rollout[:, 0, 0] = 0
119
+ return output, rollout[:, 0]
120
 
121
  def generate_attn_gradcam(self, input_ids, attention_mask, index=None):
122
  output = self.model(input_ids=input_ids, attention_mask=attention_mask)[0]
 
147
  return torch.matmul(cam_ss, R_ss)
148
 
149
  output = self.model(input_ids=input_ids, attention_mask=attention_mask)[0]
150
+
151
+ self._calculate_gradients(output, index, do_relprop=False)
152
 
153
  num_tokens = input_ids.size(-1)
154
  R = torch.eye(num_tokens).expand(output.size(0), -1, -1).clone().to(output.device)
155
 
156
+ blocks = _get_module_from_name(self.model, self.key)
157
+ for i, blk in enumerate(blocks):
158
  if i < start_layer:
159
  continue
160
  grad = blk.attention.self.get_attn_gradients().detach()
 
162
  cam = avg_heads(cam, grad)
163
  joint = apply_self_attention_rules(R, cam)
164
  R += joint
165
+ # 0 because we look at the influence *on* the CLS token
166
+ # 1:-1 because we don't want the influence *from* the CLS/SEP tokens
167
+ return output, R[:, 0, 1:-1]
168
 
lib/gradient_rollout.py CHANGED
@@ -4,68 +4,22 @@ from captum.attr import visualization
4
 
5
  from roberta2 import RobertaForSequenceClassification
6
  from util import visualize_text, PyTMinMaxScalerVectorized
 
7
 
8
  classifications = ["NEGATIVE", "POSITIVE"]
9
 
10
- class GradientRolloutExplainer:
11
- def __init__(self):
12
- self.device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
13
- self.model = RobertaForSequenceClassification.from_pretrained("textattack/roberta-base-SST-2").to(self.device)
14
- self.model.eval()
15
- self.tokenizer = AutoTokenizer.from_pretrained("textattack/roberta-base-SST-2")
16
-
17
- def tokens_from_ids(self, ids):
18
- return list(map(lambda s: s[1:] if s[0] == "Ġ" else s, self.tokenizer.convert_ids_to_tokens(ids)))
19
-
20
- def run_attribution_model(self, input_ids, attention_mask, index=None, start_layer=0):
21
- def avg_heads(cam, grad):
22
- cam = (grad * cam).clamp(min=0).mean(dim=-3)
23
- # set negative values to 0, then average
24
- # cam = cam.clamp(min=0).mean(dim=0)
25
- return cam
26
-
27
- def apply_self_attention_rules(R_ss, cam_ss):
28
- R_ss_addition = torch.matmul(cam_ss, R_ss)
29
- return R_ss_addition
30
-
31
- output = self.model(input_ids=input_ids, attention_mask=attention_mask)[0]
32
- if index == None:
33
- # index = np.expand_dims(np.arange(input_ids.shape[1])
34
- # by default explain the class with the highest score
35
- index = output.argmax(axis=-1).detach().cpu().numpy()
36
-
37
- # create a one-hot vector selecting class we want explanations for
38
- one_hot = (
39
- torch.nn.functional.one_hot(
40
- torch.tensor(index, dtype=torch.int64), num_classes=output.size(-1)
41
- )
42
- .to(torch.float)
43
- .requires_grad_(True)
44
- ).to(self.device)
45
- one_hot = torch.sum(one_hot * output)
46
- self.model.zero_grad()
47
- # create the gradients for the class we're interested in
48
- one_hot.backward(retain_graph=True)
49
-
50
- num_tokens = self.model.roberta.encoder.layer[0].attention.self.get_attn().shape[-1]
51
- R = torch.eye(num_tokens).expand(output.size(0), -1, -1).clone().to(self.device)
52
-
53
- for i, blk in enumerate(self.model.roberta.encoder.layer):
54
- if i < start_layer:
55
- continue
56
- grad = blk.attention.self.get_attn_gradients()
57
- cam = blk.attention.self.get_attn()
58
- cam = avg_heads(cam, grad)
59
- joint = apply_self_attention_rules(R, cam)
60
- R += joint
61
- return output, R[:, 0, 1:-1]
62
 
63
  def build_visualization(self, input_ids, attention_mask, index=None, start_layer=8):
64
  # generate an explanation for the input
65
  vis_data_records = []
66
 
67
  for index in range(2):
68
- output, expl = self.run_attribution_model(
69
  input_ids, attention_mask, index=index, start_layer=start_layer
70
  )
71
  # normalize scores
 
4
 
5
  from roberta2 import RobertaForSequenceClassification
6
  from util import visualize_text, PyTMinMaxScalerVectorized
7
+ from ExplanationGenerator import Generator
8
 
9
  classifications = ["NEGATIVE", "POSITIVE"]
10
 
11
+ class GradientRolloutExplainer(Generator):
12
+ def __init__(self, model, tokenizer):
13
+ super().__init__(model, key="roberta.encoder.layer")
14
+ self.device = model.device
15
+ self.tokenizer = tokenizer
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
  def build_visualization(self, input_ids, attention_mask, index=None, start_layer=8):
18
  # generate an explanation for the input
19
  vis_data_records = []
20
 
21
  for index in range(2):
22
+ output, expl = self.generate_rollout_attn_gradcam(
23
  input_ids, attention_mask, index=index, start_layer=start_layer
24
  )
25
  # normalize scores
lib/integrated_gradients.py CHANGED
@@ -6,15 +6,17 @@ from transformers import AutoTokenizer
6
  from captum.attr import LayerIntegratedGradients
7
  from captum.attr import visualization
8
 
 
 
9
  from util import visualize_text
10
 
11
  classifications = ["NEGATIVE", "POSITIVE"]
12
 
13
  class IntegratedGradientsExplainer:
14
- def __init__(self):
15
- self.device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
16
- self.model = AutoModelForSequenceClassification.from_pretrained("textattack/roberta-base-SST-2").to(self.device)
17
- self.tokenizer = AutoTokenizer.from_pretrained("textattack/roberta-base-SST-2")
18
  self.baseline_map = {
19
  'Unknown': self.tokenizer.unk_token_id,
20
  'Padding': self.tokenizer.pad_token_id,
 
6
  from captum.attr import LayerIntegratedGradients
7
  from captum.attr import visualization
8
 
9
+ from roberta2 import RobertaForSequenceClassification
10
+ from ExplanationGenerator import Generator
11
  from util import visualize_text
12
 
13
  classifications = ["NEGATIVE", "POSITIVE"]
14
 
15
  class IntegratedGradientsExplainer:
16
+ def __init__(self, model, tokenizer):
17
+ self.model = model
18
+ self.device = model.device
19
+ self.tokenizer = tokenizer
20
  self.baseline_map = {
21
  'Unknown': self.tokenizer.unk_token_id,
22
  'Padding': self.tokenizer.pad_token_id,
lib/rollout.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from transformers import AutoTokenizer
3
+ from captum.attr import visualization
4
+
5
+ from roberta2 import RobertaForSequenceClassification
6
+ from ExplanationGenerator import Generator
7
+ from util import visualize_text, PyTMinMaxScalerVectorized
8
+
9
+ classifications = ["NEGATIVE", "POSITIVE"]
10
+
11
+ class RolloutExplainer(Generator):
12
+ def __init__(self, model, tokenizer):
13
+ super().__init__(model, key="roberta.encoder.layer")
14
+ self.device = model.device
15
+ self.tokenizer = tokenizer
16
+
17
+ def build_visualization(self, input_ids, attention_mask, start_layer=8):
18
+ # generate an explanation for the input
19
+ vis_data_records = []
20
+
21
+ output, expl = self.generate_rollout(
22
+ input_ids, attention_mask, start_layer=start_layer
23
+ )
24
+ # normalize scores
25
+ scaler = PyTMinMaxScalerVectorized()
26
+
27
+ norm = scaler(expl)
28
+ # get the model classification
29
+ output = torch.nn.functional.softmax(output, dim=-1)
30
+
31
+ for record in range(input_ids.size(0)):
32
+ classification = output[record].argmax(dim=-1).item()
33
+ class_name = classifications[classification]
34
+ nrm = norm[record]
35
+
36
+ # if the classification is negative, higher explanation scores are more negative
37
+ # flip for visualization
38
+ if class_name == "NEGATIVE":
39
+ nrm *= -1
40
+ tokens = self.tokens_from_ids(input_ids[record].flatten())[
41
+ 1 : 0 - ((attention_mask[record] == 0).sum().item() + 1)
42
+ ]
43
+ vis_data_records.append(
44
+ visualization.VisualizationDataRecord(
45
+ nrm,
46
+ output[record][classification],
47
+ classification,
48
+ classification,
49
+ classification,
50
+ 1,
51
+ tokens,
52
+ 1,
53
+ )
54
+ )
55
+ return visualize_text(vis_data_records)
56
+
57
+ def __call__(self, input_text, start_layer=8):
58
+ if start_layer > 0:
59
+ start_layer -= 1
60
+
61
+ text_batch = [input_text]
62
+ encoding = self.tokenizer(text_batch, return_tensors="pt")
63
+ input_ids = encoding["input_ids"].to(self.device)
64
+ attention_mask = encoding["attention_mask"].to(self.device)
65
+
66
+ return self.build_visualization(input_ids, attention_mask, start_layer=int(start_layer))
67
+