McGill-NLP
/

LLM2Vec-Sheared-LLaMA-mntp

Model card Files Files and versions Community

vaibhavad commited on May 21, 2024

Commit

5069f5e

·

verified ·

1 Parent(s): 48b3d3b

Update attn_mask_utils.py

Files changed (1) hide show

attn_mask_utils.py +3 -2

attn_mask_utils.py CHANGED Viewed

@@ -175,8 +175,9 @@ def _prepare_4d_causal_attention_mask_for_sdpa(
             if query_length == 1:
                 # For query_length == 1, causal attention and bi-directional attention are the same.
                 attention_mask = None
-            elif key_value_length == query_length:
-                attention_mask = None
             else:
                 # Unfortunately, for query_length > 1 and key_value_length != query_length, we cannot generally ignore the attention mask, as SDPA causal mask generation
                 # may be wrong. We will set `is_causal=False` in SDPA and rely on Transformers attention_mask instead, hence not setting it to None here.

             if query_length == 1:
                 # For query_length == 1, causal attention and bi-directional attention are the same.
                 attention_mask = None
+            # Commented out to deal with batch size=1 cases
+            # elif key_value_length == query_length:
+            #     attention_mask = None
             else:
                 # Unfortunately, for query_length > 1 and key_value_length != query_length, we cannot generally ignore the attention mask, as SDPA causal mask generation
                 # may be wrong. We will set `is_causal=False` in SDPA and rely on Transformers attention_mask instead, hence not setting it to None here.