nielsr HF staff commited on
Commit
3dfe8fb
1 Parent(s): 4b6d7c1

Add print statements

Browse files
Files changed (1) hide show
  1. modeling_cogvlm.py +5 -9
modeling_cogvlm.py CHANGED
@@ -296,8 +296,8 @@ class CogVLMDecoderLayer(nn.Module):
296
 
297
  hidden_states = self.input_layernorm(hidden_states)
298
 
299
- # if print_values:
300
- # print("Hidden states before self attention:", hidden_states[0,:3,:3])
301
 
302
  # Self Attention
303
  hidden_states, self_attn_weights, present_key_value = self.self_attn(
@@ -310,8 +310,8 @@ class CogVLMDecoderLayer(nn.Module):
310
  use_cache=use_cache,
311
  )
312
 
313
- # if print_values:
314
- # print("Hidden states after self attention:", hidden_states[0,:3,:3])
315
 
316
  hidden_states = residual + hidden_states
317
 
@@ -600,10 +600,6 @@ class CogVLMModel(CogVLMPreTrainedModel):
600
  if output_hidden_states:
601
  all_hidden_states += (hidden_states,)
602
 
603
- # if idx in [0, 1, 2]:
604
- # print(f"Hidden states before layer {idx}", hidden_states[0,:3,:3])
605
- # print(f"Mean of hidden states before layer {idx}", hidden_states.mean())
606
-
607
  past_key_value = past_key_values[idx] if past_key_values is not None else None
608
  layer_outputs = decoder_layer(
609
  hidden_states,
@@ -613,7 +609,7 @@ class CogVLMModel(CogVLMPreTrainedModel):
613
  past_key_value=past_key_value,
614
  output_attentions=output_attentions,
615
  use_cache=use_cache,
616
- print_values=idx in [0, 1, 2],
617
  )
618
  hidden_states = layer_outputs[0]
619
 
 
296
 
297
  hidden_states = self.input_layernorm(hidden_states)
298
 
299
+ if print_values:
300
+ print("Hidden states before self attention:", hidden_states[0,:3,:3])
301
 
302
  # Self Attention
303
  hidden_states, self_attn_weights, present_key_value = self.self_attn(
 
310
  use_cache=use_cache,
311
  )
312
 
313
+ if print_values:
314
+ print("Hidden states after self attention:", hidden_states[0,:3,:3])
315
 
316
  hidden_states = residual + hidden_states
317
 
 
600
  if output_hidden_states:
601
  all_hidden_states += (hidden_states,)
602
 
 
 
 
 
603
  past_key_value = past_key_values[idx] if past_key_values is not None else None
604
  layer_outputs = decoder_layer(
605
  hidden_states,
 
609
  past_key_value=past_key_value,
610
  output_attentions=output_attentions,
611
  use_cache=use_cache,
612
+ print_values=idx==0,
613
  )
614
  hidden_states = layer_outputs[0]
615