Add print statements
Browse files- modeling_cogvlm.py +6 -1
modeling_cogvlm.py
CHANGED
@@ -290,6 +290,11 @@ class VisionExpertAttention(nn.Module):
|
|
290 |
context_layer = attention_fn(
|
291 |
query_layer=query_states, key_layer=key_states, value_layer=value_states, attention_mask=attention_mask,
|
292 |
scaling_attention_score=True, attention_dropout=None)
|
|
|
|
|
|
|
|
|
|
|
293 |
if context_layer.size() != (bsz, self.num_heads, q_len, self.head_dim):
|
294 |
raise ValueError(
|
295 |
f"`attn_output` should be of size {(bsz, self.num_heads, q_len, self.head_dim)}, but is"
|
@@ -657,7 +662,7 @@ class CogVLMModel(CogVLMPreTrainedModel):
|
|
657 |
past_key_value=past_key_value,
|
658 |
output_attentions=output_attentions,
|
659 |
use_cache=use_cache,
|
660 |
-
print_values=idx==0,
|
661 |
)
|
662 |
hidden_states = layer_outputs[0]
|
663 |
|
|
|
290 |
context_layer = attention_fn(
|
291 |
query_layer=query_states, key_layer=key_states, value_layer=value_states, attention_mask=attention_mask,
|
292 |
scaling_attention_score=True, attention_dropout=None)
|
293 |
+
|
294 |
+
if print_values:
|
295 |
+
print("Shape of context_layer:", context_layer.shape)
|
296 |
+
print("First values of context_layer:", context_layer[0,0,:3,:3])
|
297 |
+
|
298 |
if context_layer.size() != (bsz, self.num_heads, q_len, self.head_dim):
|
299 |
raise ValueError(
|
300 |
f"`attn_output` should be of size {(bsz, self.num_heads, q_len, self.head_dim)}, but is"
|
|
|
662 |
past_key_value=past_key_value,
|
663 |
output_attentions=output_attentions,
|
664 |
use_cache=use_cache,
|
665 |
+
print_values=idx==0 and step==1,
|
666 |
)
|
667 |
hidden_states = layer_outputs[0]
|
668 |
|