Mizukiluke
commited on
Commit
•
4071f0f
1
Parent(s):
8cabe68
for ms-swift
Browse files- modeling_hyper_qwen2.py +1 -2
modeling_hyper_qwen2.py
CHANGED
@@ -464,8 +464,7 @@ class HyperQwen2SdpaAttention(HyperQwen2Attention):
|
|
464 |
curr_query_layer = query_states[bi:bi+1]
|
465 |
curr_key_layer = key_states[bi:bi+1]
|
466 |
curr_value_layer = value_states[bi:bi+1]
|
467 |
-
|
468 |
-
is_causal = True if causal_mask is None and q_len > 1 else False
|
469 |
if is_causal:
|
470 |
full_mask = None
|
471 |
else:
|
|
|
464 |
curr_query_layer = query_states[bi:bi+1]
|
465 |
curr_key_layer = key_states[bi:bi+1]
|
466 |
curr_value_layer = value_states[bi:bi+1]
|
467 |
+
is_causal = True if q_len > 1 else False
|
|
|
468 |
if is_causal:
|
469 |
full_mask = None
|
470 |
else:
|