nielsr HF staff commited on
Commit
c0128c2
1 Parent(s): bb99c8c

Add print statements

Browse files
Files changed (2) hide show
  1. modeling_cogvlm.py +10 -12
  2. visual.py +12 -3
modeling_cogvlm.py CHANGED
@@ -296,8 +296,8 @@ class CogVLMDecoderLayer(nn.Module):
296
 
297
  hidden_states = self.input_layernorm(hidden_states)
298
 
299
- if print_values:
300
- print("Hidden states before self attention:", hidden_states[0,:3,:3])
301
 
302
  # Self Attention
303
  hidden_states, self_attn_weights, present_key_value = self.self_attn(
@@ -310,8 +310,8 @@ class CogVLMDecoderLayer(nn.Module):
310
  use_cache=use_cache,
311
  )
312
 
313
- if print_values:
314
- print("Hidden states after self attention:", hidden_states[0,:3,:3])
315
 
316
  hidden_states = residual + hidden_states
317
 
@@ -464,12 +464,12 @@ class CogVLMModel(CogVLMPreTrainedModel):
464
  repo_type="dataset",
465
  )
466
 
467
- print("First values of text embeddings:", inputs_embeds[0, :3, :3])
468
- print("First values of images_features:", images_features[0, :3])
469
 
470
  inputs_embeds = inputs_embeds.index_put([token_type_ids == VISION_TOKEN_TYPE], images_features)
471
 
472
- print("First values of inputs_embeds after index_put:", inputs_embeds[0, :3, :3])
473
 
474
  else: # single-modality
475
  if token_type_ids is None:
@@ -542,8 +542,6 @@ class CogVLMModel(CogVLMPreTrainedModel):
542
  else:
543
  position_ids = position_ids.view(-1, seq_length).long()
544
 
545
- print("Input ids:", input_ids)
546
-
547
  if inputs_embeds is None:
548
  inputs_embeds = self.embed_tokens(input_ids)
549
  # embed positions
@@ -578,9 +576,9 @@ class CogVLMModel(CogVLMPreTrainedModel):
578
  if output_hidden_states:
579
  all_hidden_states += (hidden_states,)
580
 
581
- if idx in [0, 1, 2]:
582
- print(f"Hidden states before layer {idx}", hidden_states[0,:3,:3])
583
- print(f"Mean of hidden states before layer {idx}", hidden_states.mean())
584
 
585
  past_key_value = past_key_values[idx] if past_key_values is not None else None
586
  layer_outputs = decoder_layer(
 
296
 
297
  hidden_states = self.input_layernorm(hidden_states)
298
 
299
+ # if print_values:
300
+ # print("Hidden states before self attention:", hidden_states[0,:3,:3])
301
 
302
  # Self Attention
303
  hidden_states, self_attn_weights, present_key_value = self.self_attn(
 
310
  use_cache=use_cache,
311
  )
312
 
313
+ # if print_values:
314
+ # print("Hidden states after self attention:", hidden_states[0,:3,:3])
315
 
316
  hidden_states = residual + hidden_states
317
 
 
464
  repo_type="dataset",
465
  )
466
 
467
+ # print("First values of text embeddings:", inputs_embeds[0, :3, :3])
468
+ # print("First values of images_features:", images_features[0, :3])
469
 
470
  inputs_embeds = inputs_embeds.index_put([token_type_ids == VISION_TOKEN_TYPE], images_features)
471
 
472
+ # print("First values of inputs_embeds after index_put:", inputs_embeds[0, :3, :3])
473
 
474
  else: # single-modality
475
  if token_type_ids is None:
 
542
  else:
543
  position_ids = position_ids.view(-1, seq_length).long()
544
 
 
 
545
  if inputs_embeds is None:
546
  inputs_embeds = self.embed_tokens(input_ids)
547
  # embed positions
 
576
  if output_hidden_states:
577
  all_hidden_states += (hidden_states,)
578
 
579
+ # if idx in [0, 1, 2]:
580
+ # print(f"Hidden states before layer {idx}", hidden_states[0,:3,:3])
581
+ # print(f"Mean of hidden states before layer {idx}", hidden_states.mean())
582
 
583
  past_key_value = past_key_values[idx] if past_key_values is not None else None
584
  layer_outputs = decoder_layer(
visual.py CHANGED
@@ -74,9 +74,18 @@ class TransformerLayer(nn.Module):
74
  self.mlp = MLP(config)
75
  self.post_attention_layernorm = nn.LayerNorm(config.hidden_size, eps=config.layer_norm_eps)
76
 
77
- def forward(self, hidden_states):
78
  attention_input = hidden_states
79
- attention_output = self.input_layernorm(self.attention(attention_input))
 
 
 
 
 
 
 
 
 
80
  hidden_states = attention_input + attention_output
81
  mlp_input = hidden_states
82
  mlp_output = self.post_attention_layernorm(self.mlp(mlp_input))
@@ -105,7 +114,7 @@ class Transformer(nn.Module):
105
  )
106
 
107
  for idx, layer_module in enumerate(self.layers):
108
- hidden_states = layer_module(hidden_states)
109
 
110
  print("Shape of hidden states after CLIP:", hidden_states.shape)
111
  torch.save(hidden_states, "hidden_states_after_clip.pt")
 
74
  self.mlp = MLP(config)
75
  self.post_attention_layernorm = nn.LayerNorm(config.hidden_size, eps=config.layer_norm_eps)
76
 
77
+ def forward(self, hidden_states, print_values=False):
78
  attention_input = hidden_states
79
+
80
+ if print_values:
81
+ print("Hidden states before attention:", attention_input[0, :3, :3])
82
+
83
+ attention_output = self.attention(attention_input)
84
+
85
+ if print_values:
86
+ print("Hidden states before attention:", attention_input[0, :3, :3])
87
+
88
+ attention_output = self.input_layernorm(attention_output)
89
  hidden_states = attention_input + attention_output
90
  mlp_input = hidden_states
91
  mlp_output = self.post_attention_layernorm(self.mlp(mlp_input))
 
114
  )
115
 
116
  for idx, layer_module in enumerate(self.layers):
117
+ hidden_states = layer_module(hidden_states, print_values=idx==0)
118
 
119
  print("Shape of hidden states after CLIP:", hidden_states.shape)
120
  torch.save(hidden_states, "hidden_states_after_clip.pt")