Szymon Tworkowski
commited on
Commit
•
1a5fcb5
1
Parent(s):
b65129a
remove warning
Browse files- modeling_longllama.py +0 -3
modeling_longllama.py
CHANGED
@@ -1027,9 +1027,6 @@ def _handle_long_input(
|
|
1027 |
attn_length += past_key_values[0][0].shape[-2]
|
1028 |
attention_mask = attention_mask[..., -attn_length:] if attention_mask is not None else None
|
1029 |
|
1030 |
-
if past_key_values is not None and past_key_values[0][0].shape[-2] + remaining_input_length > context_window_length:
|
1031 |
-
logger.warning("Currently, the code is not optimized for generating long outputs. "
|
1032 |
-
"You see this warning as parts of the local (generation) cache are going to be moved to the memory cache.")
|
1033 |
outputs = model(
|
1034 |
input_ids=input_ids[..., beg:] if input_ids is not None else None,
|
1035 |
attention_mask=attention_mask,
|
|
|
1027 |
attn_length += past_key_values[0][0].shape[-2]
|
1028 |
attention_mask = attention_mask[..., -attn_length:] if attention_mask is not None else None
|
1029 |
|
|
|
|
|
|
|
1030 |
outputs = model(
|
1031 |
input_ids=input_ids[..., beg:] if input_ids is not None else None,
|
1032 |
attention_mask=attention_mask,
|