Update modeling_Llamoe.py

Files changed (1) hide show

modeling_Llamoe.py CHANGED Viewed

@@ -646,7 +646,7 @@ class LlamoeFlashAttention2(LlamoeAttention):
         )
-class LlamoeSdpaAttention(GemmoeAttention):
     """
     Gemmoe attention module using torch.nn.functional.scaled_dot_product_attention. This module inherits from
     `GemmoeAttention` as the weights of the module stays untouched. The only changes are on the forward pass to adapt to

         )
+class LlamoeSdpaAttention(LlamoeAttention):
     """
     Gemmoe attention module using torch.nn.functional.scaled_dot_product_attention. This module inherits from
     `GemmoeAttention` as the weights of the module stays untouched. The only changes are on the forward pass to adapt to