Update bert_layers.py
Browse files- bert_layers.py +5 -2
bert_layers.py
CHANGED
@@ -485,7 +485,8 @@ class BertEncoder(nn.Module):
|
|
485 |
attn_mask=attention_mask,
|
486 |
bias=alibi_attn_mask)
|
487 |
all_attention_weights.append(attention_weights) # Store attention weights
|
488 |
-
print(all_attention_weights)
|
|
|
489 |
|
490 |
if not output_all_encoded_layers:
|
491 |
all_encoder_layers.append(hidden_states)
|
@@ -613,11 +614,13 @@ class BertModel(BertPreTrainedModel):
|
|
613 |
first_col_mask[:, 0] = True
|
614 |
subset_mask = masked_tokens_mask | first_col_mask
|
615 |
|
616 |
-
encoder_outputs = self.encoder(
|
617 |
embedding_output,
|
618 |
attention_mask,
|
619 |
output_all_encoded_layers=output_all_encoded_layers,
|
620 |
subset_mask=subset_mask)
|
|
|
|
|
621 |
|
622 |
if masked_tokens_mask is None:
|
623 |
sequence_output = encoder_outputs[-1]
|
|
|
485 |
attn_mask=attention_mask,
|
486 |
bias=alibi_attn_mask)
|
487 |
all_attention_weights.append(attention_weights) # Store attention weights
|
488 |
+
print(f'here is the matrix of attentions inside encoder: \n {all_attention_weights}')
|
489 |
+
print(f'and this is the shape inside encoder: \n {all_attention_weights.shape}')
|
490 |
|
491 |
if not output_all_encoded_layers:
|
492 |
all_encoder_layers.append(hidden_states)
|
|
|
614 |
first_col_mask[:, 0] = True
|
615 |
subset_mask = masked_tokens_mask | first_col_mask
|
616 |
|
617 |
+
encoder_outputs, all_attentions = self.encoder(
|
618 |
embedding_output,
|
619 |
attention_mask,
|
620 |
output_all_encoded_layers=output_all_encoded_layers,
|
621 |
subset_mask=subset_mask)
|
622 |
+
print(f'here is the matrix of attentions in BERT: \n {all_attention_weights}')
|
623 |
+
print(f'and this is the shape in BERT: \n {all_attention_weights.shape}')
|
624 |
|
625 |
if masked_tokens_mask is None:
|
626 |
sequence_output = encoder_outputs[-1]
|