RuntimeError: shape '[1, 5, 32, 64]' is invalid for input of size 2560
I am still exploring models here, and I am trying the basic code given in the Model card. I installed everything without problems. I also upgraded through the --upgrade option. However, when I run the code, I get the following error below. Has anyone encountered this as well?
RuntimeError Traceback (most recent call last)
~\AppData\Local\Temp\ipykernel_23844\1570334889.py in
11 )
12
---> 13 pipe("The key to life is")
C:\ProgramData\Anaconda3\lib\site-packages\transformers\pipelines\text_generation.py in call(self, text_inputs, **kwargs)
199 ids of the generated text.
200 """
--> 201 return super().call(text_inputs, **kwargs)
202
203 def preprocess(self, prompt_text, prefix="", handle_long_generation=None, **generate_kwargs):
C:\ProgramData\Anaconda3\lib\site-packages\transformers\pipelines\base.py in call(self, inputs, num_workers, batch_size, *args, **kwargs)
1118 )
1119 else:
-> 1120 return self.run_single(inputs, preprocess_params, forward_params, postprocess_params)
1121
1122 def run_multi(self, inputs, preprocess_params, forward_params, postprocess_params):
C:\ProgramData\Anaconda3\lib\site-packages\transformers\pipelines\base.py in run_single(self, inputs, preprocess_params, forward_params, postprocess_params)
1125 def run_single(self, inputs, preprocess_params, forward_params, postprocess_params):
1126 model_inputs = self.preprocess(inputs, **preprocess_params)
-> 1127 model_outputs = self.forward(model_inputs, **forward_params)
1128 outputs = self.postprocess(model_outputs, **postprocess_params)
1129 return outputs
C:\ProgramData\Anaconda3\lib\site-packages\transformers\pipelines\base.py in forward(self, model_inputs, **forward_params)
1024 with inference_context():
1025 model_inputs = self._ensure_tensor_on_device(model_inputs, device=self.device)
-> 1026 model_outputs = self._forward(model_inputs, **forward_params)
1027 model_outputs = self._ensure_tensor_on_device(model_outputs, device=torch.device("cpu"))
1028 else:
C:\ProgramData\Anaconda3\lib\site-packages\transformers\pipelines\text_generation.py in _forward(self, model_inputs, **generate_kwargs)
261
262 # BS x SL
--> 263 generated_sequence = self.model.generate(input_ids=input_ids, attention_mask=attention_mask, **generate_kwargs)
264 out_b = generated_sequence.shape[0]
265 if self.framework == "pt":
C:\ProgramData\Anaconda3\lib\site-packages\torch\autograd\grad_mode.py in decorate_context(*args, **kwargs)
25 def decorate_context(*args, **kwargs):
26 with self.clone():
---> 27 return func(*args, **kwargs)
28 return cast(F, decorate_context)
29
C:\ProgramData\Anaconda3\lib\site-packages\transformers\generation\utils.py in generate(self, inputs, generation_config, logits_processor, stopping_criteria, prefix_allowed_tokens_fn, synced_gpus, assistant_model, streamer, **kwargs)
1530 synced_gpus=synced_gpus,
1531 streamer=streamer,
-> 1532 **model_kwargs,
1533 )
1534
C:\ProgramData\Anaconda3\lib\site-packages\transformers\generation\utils.py in greedy_search(self, input_ids, logits_processor, stopping_criteria, max_length, pad_token_id, eos_token_id, output_attentions, output_hidden_states, output_scores, return_dict_in_generate, synced_gpus, streamer, **model_kwargs)
2341 return_dict=True,
2342 output_attentions=output_attentions,
-> 2343 output_hidden_states=output_hidden_states,
2344 )
2345
C:\ProgramData\Anaconda3\lib\site-packages\torch\nn\modules\module.py in _call_impl(self, *input, **kwargs)
1192 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1193 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1194 return forward_call(*input, **kwargs)
1195 # Do not call functions when jit is used
1196 full_backward_hooks, non_full_backward_hooks = [], []
C:\ProgramData\Anaconda3\lib\site-packages\accelerate\hooks.py in new_forward(*args, **kwargs)
163 output = old_forward(*args, **kwargs)
164 else:
--> 165 output = old_forward(*args, **kwargs)
166 return module._hf_hook.post_forward(module, output)
167
C:\ProgramData\Anaconda3\lib\site-packages\transformers\models\llama\modeling_llama.py in forward(self, input_ids, attention_mask, position_ids, past_key_values, inputs_embeds, labels, use_cache, output_attentions, output_hidden_states, return_dict)
695 output_attentions=output_attentions,
696 output_hidden_states=output_hidden_states,
--> 697 return_dict=return_dict,
698 )
699
C:\ProgramData\Anaconda3\lib\site-packages\torch\nn\modules\module.py in _call_impl(self, *input, **kwargs)
1192 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1193 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1194 return forward_call(*input, **kwargs)
1195 # Do not call functions when jit is used
1196 full_backward_hooks, non_full_backward_hooks = [], []
C:\ProgramData\Anaconda3\lib\site-packages\accelerate\hooks.py in new_forward(*args, **kwargs)
163 output = old_forward(*args, **kwargs)
164 else:
--> 165 output = old_forward(*args, **kwargs)
166 return module._hf_hook.post_forward(module, output)
167
C:\ProgramData\Anaconda3\lib\site-packages\transformers\models\llama\modeling_llama.py in forward(self, input_ids, attention_mask, position_ids, past_key_values, inputs_embeds, use_cache, output_attentions, output_hidden_states, return_dict)
582 past_key_value=past_key_value,
583 output_attentions=output_attentions,
--> 584 use_cache=use_cache,
585 )
586
C:\ProgramData\Anaconda3\lib\site-packages\torch\nn\modules\module.py in _call_impl(self, *input, **kwargs)
1192 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1193 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1194 return forward_call(*input, **kwargs)
1195 # Do not call functions when jit is used
1196 full_backward_hooks, non_full_backward_hooks = [], []
C:\ProgramData\Anaconda3\lib\site-packages\accelerate\hooks.py in new_forward(*args, **kwargs)
163 output = old_forward(*args, **kwargs)
164 else:
--> 165 output = old_forward(*args, **kwargs)
166 return module._hf_hook.post_forward(module, output)
167
C:\ProgramData\Anaconda3\lib\site-packages\transformers\models\llama\modeling_llama.py in forward(self, hidden_states, attention_mask, position_ids, past_key_value, output_attentions, use_cache)
296 past_key_value=past_key_value,
297 output_attentions=output_attentions,
--> 298 use_cache=use_cache,
299 )
300 hidden_states = residual + hidden_states
C:\ProgramData\Anaconda3\lib\site-packages\torch\nn\modules\module.py in _call_impl(self, *input, **kwargs)
1192 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1193 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1194 return forward_call(*input, **kwargs)
1195 # Do not call functions when jit is used
1196 full_backward_hooks, non_full_backward_hooks = [], []
C:\ProgramData\Anaconda3\lib\site-packages\accelerate\hooks.py in new_forward(*args, **kwargs)
163 output = old_forward(*args, **kwargs)
164 else:
--> 165 output = old_forward(*args, **kwargs)
166 return module._hf_hook.post_forward(module, output)
167
C:\ProgramData\Anaconda3\lib\site-packages\transformers\models\llama\modeling_llama.py in forward(self, hidden_states, attention_mask, position_ids, past_key_value, output_attentions, use_cache)
193
194 query_states = self.q_proj(hidden_states).view(bsz, q_len, self.num_heads, self.head_dim).transpose(1, 2)
--> 195 key_states = self.k_proj(hidden_states).view(bsz, q_len, self.num_heads, self.head_dim).transpose(1, 2)
196 value_states = self.v_proj(hidden_states).view(bsz, q_len, self.num_heads, self.head_dim).transpose(1, 2)
197
RuntimeError: shape '[1, 5, 32, 64]' is invalid for input of size 2560
Got same error, do you any progress on that one?