Update bert_layers.py
Browse files- bert_layers.py +5 -5
bert_layers.py
CHANGED
@@ -495,7 +495,7 @@ class BertEncoder(nn.Module):
|
|
495 |
all_attention_weights.append(attention_weights) # JAANDOUI: appending the attention of different layers together.
|
496 |
# print(f'here is the matrix of attentions inside encoder: \n {all_attention_weights}')
|
497 |
# print(f'and this is the shape inside encoder: \n {all_attention_weights.shape}')
|
498 |
-
print(f'NUMBER6: {all_attention_weights}')
|
499 |
if not output_all_encoded_layers:
|
500 |
all_encoder_layers.append(hidden_states)
|
501 |
|
@@ -632,7 +632,7 @@ class BertModel(BertPreTrainedModel):
|
|
632 |
attention_mask,
|
633 |
output_all_encoded_layers=output_all_encoded_layers,
|
634 |
subset_mask=subset_mask)
|
635 |
-
print(f'NUMBER7: {all_attention_weights}')
|
636 |
# print(f'here is the matrix of attentions in BERT: \n {all_attention_weights}')
|
637 |
# print(f'and this is the shape in BERT: \n {all_attention_weights.shape}')
|
638 |
|
@@ -658,11 +658,11 @@ class BertModel(BertPreTrainedModel):
|
|
658 |
|
659 |
# JAANDOUI: returning all_attention_weights too
|
660 |
if self.pooler is not None:
|
661 |
-
print(f'NUMBER8: {all_attention_weights}')
|
662 |
return encoder_outputs, pooled_output, all_attention_weights
|
663 |
|
664 |
# JAANDOUI: returning all_attention_weights too
|
665 |
-
print(f'NUMBER9: {all_attention_weights}')
|
666 |
return encoder_outputs, None, all_attention_weights
|
667 |
# JAANDOUI: need to handle the returned elements wherever BertModel is instantiated.
|
668 |
|
@@ -903,7 +903,7 @@ class BertForSequenceClassification(BertPreTrainedModel):
|
|
903 |
|
904 |
# JAANDOUI:
|
905 |
all_attention_weights = outputs[2]
|
906 |
-
print(f'last: {all_attention_weights}')
|
907 |
|
908 |
pooled_output = self.dropout(pooled_output)
|
909 |
logits = self.classifier(pooled_output)
|
|
|
495 |
all_attention_weights.append(attention_weights) # JAANDOUI: appending the attention of different layers together.
|
496 |
# print(f'here is the matrix of attentions inside encoder: \n {all_attention_weights}')
|
497 |
# print(f'and this is the shape inside encoder: \n {all_attention_weights.shape}')
|
498 |
+
# print(f'NUMBER6: {all_attention_weights}')
|
499 |
if not output_all_encoded_layers:
|
500 |
all_encoder_layers.append(hidden_states)
|
501 |
|
|
|
632 |
attention_mask,
|
633 |
output_all_encoded_layers=output_all_encoded_layers,
|
634 |
subset_mask=subset_mask)
|
635 |
+
# print(f'NUMBER7: {all_attention_weights}')
|
636 |
# print(f'here is the matrix of attentions in BERT: \n {all_attention_weights}')
|
637 |
# print(f'and this is the shape in BERT: \n {all_attention_weights.shape}')
|
638 |
|
|
|
658 |
|
659 |
# JAANDOUI: returning all_attention_weights too
|
660 |
if self.pooler is not None:
|
661 |
+
# print(f'NUMBER8: {all_attention_weights}')
|
662 |
return encoder_outputs, pooled_output, all_attention_weights
|
663 |
|
664 |
# JAANDOUI: returning all_attention_weights too
|
665 |
+
# print(f'NUMBER9: {all_attention_weights}')
|
666 |
return encoder_outputs, None, all_attention_weights
|
667 |
# JAANDOUI: need to handle the returned elements wherever BertModel is instantiated.
|
668 |
|
|
|
903 |
|
904 |
# JAANDOUI:
|
905 |
all_attention_weights = outputs[2]
|
906 |
+
# print(f'last: {all_attention_weights}')
|
907 |
|
908 |
pooled_output = self.dropout(pooled_output)
|
909 |
logits = self.classifier(pooled_output)
|