damerajee commited on
Commit
7fc5f8a
1 Parent(s): 3792fda

Update modeling_Llamoe.py

Browse files
Files changed (1) hide show
  1. modeling_Llamoe.py +1 -1
modeling_Llamoe.py CHANGED
@@ -646,7 +646,7 @@ class LlamoeFlashAttention2(LlamoeAttention):
646
  )
647
 
648
 
649
- class LlamoeSdpaAttention(GemmoeAttention):
650
  """
651
  Gemmoe attention module using torch.nn.functional.scaled_dot_product_attention. This module inherits from
652
  `GemmoeAttention` as the weights of the module stays untouched. The only changes are on the forward pass to adapt to
 
646
  )
647
 
648
 
649
+ class LlamoeSdpaAttention(LlamoeAttention):
650
  """
651
  Gemmoe attention module using torch.nn.functional.scaled_dot_product_attention. This module inherits from
652
  `GemmoeAttention` as the weights of the module stays untouched. The only changes are on the forward pass to adapt to