Sentence Similarity
Transformers
Safetensors
English
llama
feature-extraction
text-embedding
embeddings
information-retrieval
beir
text-classification
language-model
text-clustering
text-semantic-similarity
text-evaluation
text-reranking
Sentence Similarity
natural_questions
ms_marco
fever
hotpot_qa
mteb
custom_code
text-generation-inference
Inference Endpoints
Update attn_mask_utils.py
Browse files- attn_mask_utils.py +3 -2
attn_mask_utils.py
CHANGED
@@ -38,8 +38,9 @@ def _prepare_4d_attention_mask_for_sdpa(
|
|
38 |
elif query_length == 1:
|
39 |
# For query_length == 1, causal attention and bi-directional attention are the same.
|
40 |
attention_mask = None
|
41 |
-
|
42 |
-
|
|
|
43 |
else:
|
44 |
# Unfortunately, for query_length > 1 and key_value_length != query_length, we cannot generally ignore the attention mask, as SDPA causal mask generation
|
45 |
# may be wrong. We will set `is_causal=False` in SDPA and rely on Transformers attention_mask instead, hence not setting it to None here.
|
|
|
38 |
elif query_length == 1:
|
39 |
# For query_length == 1, causal attention and bi-directional attention are the same.
|
40 |
attention_mask = None
|
41 |
+
# Commented out to deal with batch size=1 cases
|
42 |
+
# elif key_value_length == query_length:
|
43 |
+
# attention_mask = None
|
44 |
else:
|
45 |
# Unfortunately, for query_length > 1 and key_value_length != query_length, we cannot generally ignore the attention mask, as SDPA causal mask generation
|
46 |
# may be wrong. We will set `is_causal=False` in SDPA and rely on Transformers attention_mask instead, hence not setting it to None here.
|