Inference Failure

#2
by evilperson068 - opened
    layer_outputs = decoder_layer(
  File "/opt/miniconda3/envs/torch2/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1505, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
  File "/opt/miniconda3/envs/torch2/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1514, in _call_impl
    return forward_call(*args, **kwargs)
  File "/opt/miniconda3/envs/torch2/lib/python3.10/site-packages/accelerate/hooks.py", line 165, in new_forward
    output = old_forward(*args, **kwargs)
  File "/opt/miniconda3/envs/torch2/lib/python3.10/site-packages/transformers/models/llama/modeling_llama.py", line 408, in forward
    hidden_states, self_attn_weights, present_key_value = self.self_attn(
  File "/opt/miniconda3/envs/torch2/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1505, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
  File "/opt/miniconda3/envs/torch2/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1514, in _call_impl
    return forward_call(*args, **kwargs)
  File "/opt/miniconda3/envs/torch2/lib/python3.10/site-packages/accelerate/hooks.py", line 165, in new_forward
    output = old_forward(*args, **kwargs)
  File "/opt/miniconda3/envs/torch2/lib/python3.10/site-packages/transformers/models/llama/modeling_llama.py", line 295, in forward
    query_states = [F.linear(hidden_states, query_slices[i]) for i in range(self.pretraining_tp)]
  File "/opt/miniconda3/envs/torch2/lib/python3.10/site-packages/transformers/models/llama/modeling_llama.py", line 295, in <listcomp>
    query_states = [F.linear(hidden_states, query_slices[i]) for i in range(self.pretraining_tp)]
RuntimeError: mat1 and mat2 shapes cannot be multiplied (25x5120 and 1x2560)

Anyone please help with this issue? Thanks.

Sign up or log in to comment