Qwen
/

Qwen-7B-Chat-Int4

Text Generation

4-bit precision

Model card Files Files and versions Community

yangapku commited on Sep 26, 2023

Commit

fcc99d6

•

1 Parent(s): f4b568f

update modeling_qwen.py

Files changed (2) hide show

assets/wechat.png +0 -0
modeling_qwen.py +2 -1

assets/wechat.png CHANGED Viewed

modeling_qwen.py CHANGED Viewed

@@ -193,9 +193,10 @@ class FlashSelfAttention(torch.nn.Module):
         if attention_mask is not None:
             k, indices_k, cu_seqlens_k, seqlen_k = self.unpad_input(k, attention_mask)
             v = v[indices_k]
-            if seqlen_q == seqlen_k:
                 q = q[indices_k]
                 cu_seqlens_q = cu_seqlens_k
         else:
             cu_seqlens_k = torch.arange(
                 0,

         if attention_mask is not None:
             k, indices_k, cu_seqlens_k, seqlen_k = self.unpad_input(k, attention_mask)
             v = v[indices_k]
+            if self.training or q.size(0) == k.size(0):
                 q = q[indices_k]
                 cu_seqlens_q = cu_seqlens_k
+                seqlen_q = seqlen_k
         else:
             cu_seqlens_k = torch.arange(
                 0,