Jackmin801 commited on
Commit
bc43a5e
1 Parent(s): df1a7f6

allow math kernel

Browse files
Files changed (1) hide show
  1. modeling_bert.py +1 -1
modeling_bert.py CHANGED
@@ -378,7 +378,7 @@ class JinaBertSelfAttention(nn.Module):
378
  b, _, s, _ = query_layer.shape
379
  new_bias = attention_mask + bias
380
  attn = scaled_dot_product_attention(query_layer, key_layer, value_layer, new_bias)
381
- attn = attn.permute(0, 2, 1, 3)
382
  return (attn.view(b, s, self.all_head_size),)
383
 
384
  # Take the dot product between "query" and "key" to get the raw attention scores.
 
378
  b, _, s, _ = query_layer.shape
379
  new_bias = attention_mask + bias
380
  attn = scaled_dot_product_attention(query_layer, key_layer, value_layer, new_bias)
381
+ attn = attn.permute(0, 2, 1, 3).contiguous()
382
  return (attn.view(b, s, self.all_head_size),)
383
 
384
  # Take the dot product between "query" and "key" to get the raw attention scores.