Error running the model

#1
by voxxer - opened

When you try running the model using the provided code, the following error appears:
---------------------------------------------------------------------------

RuntimeError Traceback (most recent call last)

in ()
1 input_ids = tokenizer(prompt_template, return_tensors='pt').input_ids.cuda()
----> 2 output = model.generate(inputs=input_ids, temperature=0.7, max_new_tokens=512)
3 print(tokenizer.decode(output[0]))

13 frames

/usr/local/lib/python3.10/dist-packages/auto_gptq/modeling/_base.py in generate(self, **kwargs)
421 """shortcut for model.generate"""
422 with torch.inference_mode(), torch.amp.autocast(device_type=self.device.type):
--> 423 return self.model.generate(**kwargs)
424
425 def prepare_inputs_for_generation(self, *args, **kwargs):

/usr/local/lib/python3.10/dist-packages/torch/utils/_contextlib.py in decorate_context(*args, **kwargs)
113 def decorate_context(*args, **kwargs):
114 with ctx_factory():
--> 115 return func(*args, **kwargs)
116
117 return decorate_context

/usr/local/lib/python3.10/dist-packages/transformers/generation/utils.py in generate(self, inputs, generation_config, logits_processor, stopping_criteria, prefix_allowed_tokens_fn, synced_gpus, assistant_model, streamer, **kwargs)
1520
1521 # 11. run greedy search
-> 1522 return self.greedy_search(
1523 input_ids,
1524 logits_processor=logits_processor,

/usr/local/lib/python3.10/dist-packages/transformers/generation/utils.py in greedy_search(self, input_ids, logits_processor, stopping_criteria, max_length, pad_token_id, eos_token_id, output_attentions, output_hidden_states, output_scores, return_dict_in_generate, synced_gpus, streamer, **model_kwargs)
2337
2338 # forward pass to get next token
-> 2339 outputs = self(
2340 **model_inputs,
2341 return_dict=True,

/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py in _call_impl(self, *args, **kwargs)
1499 or _global_backward_pre_hooks or _global_backward_hooks
1500 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1501 return forward_call(*args, **kwargs)
1502 # Do not call functions when jit is used
1503 full_backward_hooks, non_full_backward_hooks = [], []

~/.cache/huggingface/modules/transformers_modules/TheBloke/h2ogpt-gm-oasst1-en-2048-falcon-7b-v3-GPTQ/bb6a3a0b5a5a6b809877daa8397603a4467ac90d/modelling_RW.py in forward(self, input_ids, past_key_values, attention_mask, head_mask, inputs_embeds, labels, use_cache, output_attentions, output_hidden_states, return_dict, **deprecated_arguments)
751 return_dict = return_dict if return_dict is not None else self.config.use_return_dict
752
--> 753 transformer_outputs = self.transformer(
754 input_ids,
755 past_key_values=past_key_values,

/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py in _call_impl(self, *args, **kwargs)
1499 or _global_backward_pre_hooks or _global_backward_hooks
1500 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1501 return forward_call(*args, **kwargs)
1502 # Do not call functions when jit is used
1503 full_backward_hooks, non_full_backward_hooks = [], []

~/.cache/huggingface/modules/transformers_modules/TheBloke/h2ogpt-gm-oasst1-en-2048-falcon-7b-v3-GPTQ/bb6a3a0b5a5a6b809877daa8397603a4467ac90d/modelling_RW.py in forward(self, input_ids, past_key_values, attention_mask, head_mask, inputs_embeds, use_cache, output_attentions, output_hidden_states, return_dict, **deprecated_arguments)
646 )
647 else:
--> 648 outputs = block(
649 hidden_states,
650 layer_past=layer_past,

/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py in _call_impl(self, *args, **kwargs)
1499 or _global_backward_pre_hooks or _global_backward_hooks
1500 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1501 return forward_call(*args, **kwargs)
1502 # Do not call functions when jit is used
1503 full_backward_hooks, non_full_backward_hooks = [], []

~/.cache/huggingface/modules/transformers_modules/TheBloke/h2ogpt-gm-oasst1-en-2048-falcon-7b-v3-GPTQ/bb6a3a0b5a5a6b809877daa8397603a4467ac90d/modelling_RW.py in forward(self, hidden_states, alibi, attention_mask, layer_past, head_mask, use_cache, output_attentions)
383
384 # Self attention.
--> 385 attn_outputs = self.self_attention(
386 layernorm_output,
387 layer_past=layer_past,

/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py in _call_impl(self, *args, **kwargs)
1499 or _global_backward_pre_hooks or _global_backward_hooks
1500 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1501 return forward_call(*args, **kwargs)
1502 # Do not call functions when jit is used
1503 full_backward_hooks, non_full_backward_hooks = [], []

~/.cache/huggingface/modules/transformers_modules/TheBloke/h2ogpt-gm-oasst1-en-2048-falcon-7b-v3-GPTQ/bb6a3a0b5a5a6b809877daa8397603a4467ac90d/modelling_RW.py in forward(self, hidden_states, alibi, attention_mask, layer_past, head_mask, use_cache, output_attentions)
240 output_attentions: bool = False,
241 ):
--> 242 fused_qkv = self.query_key_value(hidden_states) # [batch_size, seq_length, 3 x hidden_size]
243
244 # 3 x [batch_size, seq_length, num_heads, head_dim]

/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py in _call_impl(self, *args, **kwargs)
1499 or _global_backward_pre_hooks or _global_backward_hooks
1500 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1501 return forward_call(*args, **kwargs)
1502 # Do not call functions when jit is used
1503 full_backward_hooks, non_full_backward_hooks = [], []

/usr/local/lib/python3.10/dist-packages/auto_gptq/nn_modules/qlinear_old.py in forward(self, x)
219 weight = torch.bitwise_right_shift(torch.unsqueeze(self.qweight, 1).expand(-1, 32 // self.bits, -1), self.wf.unsqueeze(-1)).to(torch.int16 if self.bits == 8 else torch.int8)
220 torch.bitwise_and(weight,(2 ** self.bits) - 1, out=weight)
--> 221 weight = weight.reshape(-1, self.group_size, weight.shape[2])
222 elif self.bits == 3:
223 zeros = self.qzeros.reshape(self.qzeros.shape[0], self.qzeros.shape[1]//3, 3, 1).expand(-1, -1, -1, 12)

RuntimeError: shape '[-1, 128, 4672]' is invalid for input of size 21229568 .

Sign up or log in to comment