runtime error

usr/local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl return self._call_impl(*args, **kwargs) File "/usr/local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl return forward_call(*args, **kwargs) File "/home/user/app/openlrm/models/encoders/dinov2/layers/attention.py", line 84, in forward x = memory_efficient_attention(q, k, v, attn_bias=attn_bias) File "/usr/local/lib/python3.10/site-packages/xformers/ops/fmha/__init__.py", line 276, in memory_efficient_attention return _memory_efficient_attention( File "/usr/local/lib/python3.10/site-packages/xformers/ops/fmha/__init__.py", line 395, in _memory_efficient_attention return _memory_efficient_attention_forward( File "/usr/local/lib/python3.10/site-packages/xformers/ops/fmha/__init__.py", line 414, in _memory_efficient_attention_forward op = _dispatch_fw(inp, False) File "/usr/local/lib/python3.10/site-packages/xformers/ops/fmha/dispatch.py", line 119, in _dispatch_fw return _run_priority_list( File "/usr/local/lib/python3.10/site-packages/xformers/ops/fmha/dispatch.py", line 55, in _run_priority_list raise NotImplementedError(msg) NotImplementedError: No operator found for `memory_efficient_attention_forward` with inputs: query : shape=(1, 581, 12, 64) (torch.float32) key : shape=(1, 581, 12, 64) (torch.float32) value : shape=(1, 581, 12, 64) (torch.float32) attn_bias : <class 'NoneType'> p : 0.0 `decoderF` is not supported because: device=cpu (supported: {'cuda'}) attn_bias type is <class 'NoneType'> `flshattF@v2.5.7` is not supported because: device=cpu (supported: {'cuda'}) dtype=torch.float32 (supported: {torch.float16, torch.bfloat16}) `cutlassF` is not supported because: device=cpu (supported: {'cuda'}) `smallkF` is not supported because: max(query.shape[-1] != value.shape[-1]) > 32 device=cpu (supported: {'cuda'}) unsupported embed per head: 64

Container logs:

Fetching error logs...