JesusCrist commited on
Commit
cf77e31
1 Parent(s): 1bb50bc

Update modeling.py

Browse files

There is currently a bug in the code where manually setting the device for sentence transformer to a non-zero device triggers an issue with tensors not being on the same device. This modification is made during the initialization of attention_bias and does not introduce additional risks or inference time overhead.

Files changed (1) hide show
  1. modeling.py +2 -2
modeling.py CHANGED
@@ -897,11 +897,11 @@ class NewModel(NewPreTrainedModel):
897
 
898
  if unpad_inputs:
899
  assert self.config.use_memory_efficient_attention
900
- attention_bias = xops.fmha.attn_bias.BlockDiagonalMask.from_seqlens(length)
901
  else:
902
  # We can provide a self-attention mask of dimensions [batch_size, from_seq_length, to_seq_length]
903
  # ourselves in which case we just need to make it broadcastable to all heads.
904
- attention_bias = self.get_extended_attention_mask(attention_mask, input_shape)
905
  if self.config.use_memory_efficient_attention:
906
  # Invalid shape for attention bias: torch.Size([48, 1, 1, 512]) (expected (48, 12, 512, 512))
907
  attention_bias = attention_bias.expand(-1, self.config.num_attention_heads, seq_length, -1)
 
897
 
898
  if unpad_inputs:
899
  assert self.config.use_memory_efficient_attention
900
+ attention_bias = xops.fmha.attn_bias.BlockDiagonalMask.from_seqlens(length,device=self.device)
901
  else:
902
  # We can provide a self-attention mask of dimensions [batch_size, from_seq_length, to_seq_length]
903
  # ourselves in which case we just need to make it broadcastable to all heads.
904
+ attention_bias = self.get_extended_attention_mask(attention_mask, input_shape,device=self.device)
905
  if self.config.use_memory_efficient_attention:
906
  # Invalid shape for attention bias: torch.Size([48, 1, 1, 512]) (expected (48, 12, 512, 512))
907
  attention_bias = attention_bias.expand(-1, self.config.num_attention_heads, seq_length, -1)