nikigoli commited on
Commit
8e052dd
1 Parent(s): a5e9c89

More debug printing inside transformer encoder

Browse files
Files changed (1) hide show
  1. models/GroundingDINO/transformer.py +12 -1
models/GroundingDINO/transformer.py CHANGED
@@ -560,7 +560,7 @@ class TransformerEncoder(nn.Module):
560
  """
561
 
562
  output = src
563
-
564
  # preparation and reshape
565
  if self.num_layers > 0:
566
  reference_points = self.get_reference_points(
@@ -591,8 +591,10 @@ class TransformerEncoder(nn.Module):
591
  # if output.isnan().any() or memory_text.isnan().any():
592
  # if os.environ.get('IPDB_SHILONG_DEBUG', None) == 'INFO':
593
  # import ipdb; ipdb.set_trace()
 
594
  if self.fusion_layers:
595
  if self.use_checkpoint:
 
596
  output, memory_text = checkpoint.checkpoint(
597
  self.fusion_layers[layer_id],
598
  output,
@@ -600,24 +602,30 @@ class TransformerEncoder(nn.Module):
600
  key_padding_mask,
601
  text_attention_mask,
602
  )
 
603
  else:
 
604
  output, memory_text = self.fusion_layers[layer_id](
605
  v=output,
606
  l=memory_text,
607
  attention_mask_v=key_padding_mask,
608
  attention_mask_l=text_attention_mask,
609
  )
 
610
 
611
  if self.text_layers:
 
612
  memory_text = self.text_layers[layer_id](
613
  src=memory_text.transpose(0, 1),
614
  src_mask=~text_self_attention_masks, # note we use ~ for mask here
615
  src_key_padding_mask=text_attention_mask,
616
  pos=(pos_text.transpose(0, 1) if pos_text is not None else None),
617
  ).transpose(0, 1)
 
618
 
619
  # main process
620
  if self.use_transformer_ckpt:
 
621
  output = checkpoint.checkpoint(
622
  layer,
623
  output,
@@ -627,7 +635,9 @@ class TransformerEncoder(nn.Module):
627
  level_start_index,
628
  key_padding_mask,
629
  )
 
630
  else:
 
631
  output = layer(
632
  src=output,
633
  pos=pos,
@@ -636,6 +646,7 @@ class TransformerEncoder(nn.Module):
636
  level_start_index=level_start_index,
637
  key_padding_mask=key_padding_mask,
638
  )
 
639
 
640
  return output, memory_text
641
 
 
560
  """
561
 
562
  output = src
563
+ print("inside transformer encoder")
564
  # preparation and reshape
565
  if self.num_layers > 0:
566
  reference_points = self.get_reference_points(
 
591
  # if output.isnan().any() or memory_text.isnan().any():
592
  # if os.environ.get('IPDB_SHILONG_DEBUG', None) == 'INFO':
593
  # import ipdb; ipdb.set_trace()
594
+ print("layer_id: " + str(layer_id))
595
  if self.fusion_layers:
596
  if self.use_checkpoint:
597
+ print("using checkpoint")
598
  output, memory_text = checkpoint.checkpoint(
599
  self.fusion_layers[layer_id],
600
  output,
 
602
  key_padding_mask,
603
  text_attention_mask,
604
  )
605
+ print("got checkpoint output")
606
  else:
607
+ print("not using checkpoint")
608
  output, memory_text = self.fusion_layers[layer_id](
609
  v=output,
610
  l=memory_text,
611
  attention_mask_v=key_padding_mask,
612
  attention_mask_l=text_attention_mask,
613
  )
614
+ print("got fusion output")
615
 
616
  if self.text_layers:
617
+ print("getting text layers")
618
  memory_text = self.text_layers[layer_id](
619
  src=memory_text.transpose(0, 1),
620
  src_mask=~text_self_attention_masks, # note we use ~ for mask here
621
  src_key_padding_mask=text_attention_mask,
622
  pos=(pos_text.transpose(0, 1) if pos_text is not None else None),
623
  ).transpose(0, 1)
624
+ print("got text output")
625
 
626
  # main process
627
  if self.use_transformer_ckpt:
628
+ print("use transformer ckpt")
629
  output = checkpoint.checkpoint(
630
  layer,
631
  output,
 
635
  level_start_index,
636
  key_padding_mask,
637
  )
638
+ print("got output")
639
  else:
640
+ print("not use transformer ckpt")
641
  output = layer(
642
  src=output,
643
  pos=pos,
 
646
  level_start_index=level_start_index,
647
  key_padding_mask=key_padding_mask,
648
  )
649
+ print("got output")
650
 
651
  return output, memory_text
652