emozilla
/

mpt-7b-storywriter-fast

Text Generation

text-generation-inference

Model card Files Files and versions Community

emozilla commited on Jun 8, 2023

Commit

680a524

•

1 Parent(s): 2e25a9a

fix training

Files changed (1) hide show

attention.py +3 -0

attention.py CHANGED Viewed

@@ -332,6 +332,7 @@ class MultiheadAttention(nn.Module, Attn):
                     key: torch.Tensor,
                     value: torch.Tensor,
                     n_heads: int,
                     softmax_scale: Optional[float],
                     attn_bias: Optional[torch.Tensor],
                     key_padding_mask: Optional[torch.ByteTensor],
@@ -345,6 +346,7 @@ class MultiheadAttention(nn.Module, Attn):
                         key,
                         value,
                         n_heads,
                         softmax_scale,
                         attn_bias,
                         key_padding_mask,
@@ -361,6 +363,7 @@ class MultiheadAttention(nn.Module, Attn):
                 key,
                 value,
                 self.n_heads,
                 self.softmax_scale,
                 attn_bias,
                 key_padding_mask,

                     key: torch.Tensor,
                     value: torch.Tensor,
                     n_heads: int,
+                    past_key_value,
                     softmax_scale: Optional[float],
                     attn_bias: Optional[torch.Tensor],
                     key_padding_mask: Optional[torch.ByteTensor],
                         key,
                         value,
                         n_heads,
+                        past_key_value,
                         softmax_scale,
                         attn_bias,
                         key_padding_mask,
                 key,
                 value,
                 self.n_heads,
+                past_key_value,
                 self.softmax_scale,
                 attn_bias,
                 key_padding_mask,