chandar-lab/NeoBERT · Error with long text

Hi,
I tried this model and it has a problem with very long text, probably the problem with NeoBERT code.
The quick workaround is to set the model.max_seq_length = 4,096
File ~/.cache/huggingface/modules/transformers_modules/chandar-lab/NeoBERT/a4fbc49a61db10ff2db66140ae59c09d96c027f9/model.py:271, in NeoBERT.forward(self, input_ids, posi
tion_ids, max_seqlen, cu_seqlens, attention_mask, output_hidden_states, output_attentions, **kwargs)                                                                      
    269 # Transformer encoder                                                                                                                                             
    270 for layer in self.transformer_encoder:                                                                                                                            
--> 271     x, attn = layer(x, attention_mask, freqs_cis, output_attentions, max_seqlen, cu_seqlens)                                                                      
    272     if output_hidden_states:                                                                                                                                      
    273         hidden_states.append(x)                                                                                                                                   
                                                                                                                                                                          
File ~/miniconda3/envs/transformers/lib/python3.11/site-packages/torch/nn/modules/module.py:1739, in Module._wrapped_call_impl(self, *args, **kwargs)                     
   1737     return self._compiled_call_impl(*args, **kwargs)  # type: ignore[misc]                                                                                        
   1738 else:                                                                                                                                                             
-> 1739     return self._call_impl(*args, **kwargs)                                                                                                                       
                                                                                                                                                                          
File ~/miniconda3/envs/transformers/lib/python3.11/site-packages/torch/nn/modules/module.py:1750, in Module._call_impl(self, *args, **kwargs)                             
   1745 # If we don't have any hooks, we want to skip the rest of the logic in                                                                                            
   1746 # this function, and just call forward.                                                                                                                           
   1747 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks                                                        
   1748         or _global_backward_pre_hooks or _global_backward_hooks                                                                                                   
   1749         or _global_forward_hooks or _global_forward_pre_hooks):                                                                                                   
-> 1750     return forward_call(*args, **kwargs)                                                                                                                          
   1752 result = None                                                                                                                                                     
   1753 called_always_called_hooks = set()                                                                                                                                
                                                                                                                                                                          
File ~/.cache/huggingface/modules/transformers_modules/chandar-lab/NeoBERT/a4fbc49a61db10ff2db66140ae59c09d96c027f9/model.py:136, in EncoderBlock.forward(self, x, attenti
on_mask, freqs_cis, output_attentions, max_seqlen, cu_seqlens)                                                                                                            
    126 def forward(                                                                                                                                                      
    127     self,                                                                                                                                                         
    128     x: torch.Tensor,                                                                                                                                              
   (...)                                                                             
    134 ):                                                                           
    135     # Attention                                                              
--> 136     attn_output, attn_weights = self._att_block(                             
    137         self.attention_norm(x), attention_mask, freqs_cis, output_attentions, max_seqlen, cu_seqlens                                                              
    138     )                                                                        
    140     # Residual                                                               
    141     x = x + attn_output                                                      

File ~/.cache/huggingface/modules/transformers_modules/chandar-lab/NeoBERT/a4fbc49a61db10ff2db66140ae59c09d96c027f9/model.py:161, in EncoderBlock._att_block(self, x, atte
ntion_mask, freqs_cis, output_attentions, max_seqlen, cu_seqlens)                    
    157 batch_size, seq_len, _ = x.shape                                             
    159 xq, xk, xv = self.qkv(x).view(batch_size, seq_len, self.config.num_attention_heads, self.config.dim_head * 3).chunk(3, axis=-1)                                   
--> 161 xq, xk = apply_rotary_emb(xq, xk, freqs_cis)                                 
    163 # Attn block                                                                 
    164 attn_weights = None                                                          

File ~/.cache/huggingface/modules/transformers_modules/chandar-lab/NeoBERT/a4fbc49a61db10ff2db66140ae59c09d96c027f9/rotary.py:58, in apply_rotary_emb(xq, xk, freqs_cis)  
     56 xq_ = torch.view_as_complex(xq.float().reshape(*xq.shape[:-1], -1, 2))                                                                                            
     57 xk_ = torch.view_as_complex(xk.float().reshape(*xk.shape[:-1], -1, 2))                                                                                            
---> 58 freqs_cis = reshape_for_broadcast(freqs_cis, xq_)                            
     59 xq_out = torch.view_as_real(xq_ * freqs_cis).flatten(3)                                                                                                           
     60 xk_out = torch.view_as_real(xk_ * freqs_cis).flatten(3)                                                                                                           

File ~/.cache/huggingface/modules/transformers_modules/chandar-lab/NeoBERT/a4fbc49a61db10ff2db66140ae59c09d96c027f9/rotary.py:31, in reshape_for_broadcast(freqs_cis, x)  
     30 def reshape_for_broadcast(freqs_cis: torch.Tensor, x: torch.Tensor):                                                                                              
---> 31     assert freqs_cis.shape[1:] == (x.shape[1], x.shape[-1])                                                                                                       
     32     return freqs_cis.contiguous().unsqueeze(2)                               

AssertionError: