lolcats / src /model /linear_attention /linear_window_attention_sw_long.py
ariG23498's picture
ariG23498 HF staff
chore: adding lolcats configs scrc and src
ae81e0f
"""
LoLCATs attention combining sliding window and linear attentions
- Using standard sliding window arrangement
- Training over long sequences with fixed memory with recurrent view
- During attention transfer, use Flash Attention to compute softmax attention outputs
For each layer:
- We first compute (softmax) attention over sliding windows
- We then compute standard linear attention to "fill in" the earlier parts
- We combine to model the entire sequence
"""
from .linear_window_attention_tk_long import LolcatsTKWindowLongAttention
from .linear_window_attention_sw import hybrid_attention_quadratic
class LolcatsSlidingWindowLongAttention(LolcatsTKWindowLongAttention):
"""
Lolcats attention combining sliding window and linear attention
"""
def __init__(self, remove_base_attn=True, **kwargs):
# keep self.base_attn for Flash Attention inference
super().__init__(remove_base_attn=True, **kwargs)
self.quadratic_attention = hybrid_attention_quadratic