ValueError: Input length of input_ids is 48, but `max_length` is set to 20.

#1
by HengfengWei - opened

With the code in Model card, I run into the following error:

ValueError: Input length of input_ids is 48, but max_length is set to 20. This can lead to unexpected behavior. You should consider increasing max_length or, better yet, setting max_new_tokens.

The error trace is as follows:

Cell In[3], line 11
      8 image = Image.open(requests.get(url, stream=True).raw)
      9 prompt = "Assistant helps to write down the TikZ code for the user's image. USER: <image>\nWrite down the TikZ code to draw the diagram shown in the image. ASSISTANT: "
---> 11 print(pipe(image, prompt=prompt)[0]['generated_text'])

File D:\learning-ml\llm\tikz\.venv\Lib\site-packages\transformers\pipelines\image_to_text.py:125, in ImageToTextPipeline.__call__(self, images, **kwargs)
     97 def __call__(self, images: Union[str, List[str], "Image.Image", List["Image.Image"]], **kwargs):
     98     """
     99     Assign labels to the image(s) passed as inputs.
    100 
   (...)
    123         - **generated_text** (`str`) -- The generated text.
    124     """
--> 125     return super().__call__(images, **kwargs)

File D:\learning-ml\llm\tikz\.venv\Lib\site-packages\transformers\pipelines\base.py:1254, in Pipeline.__call__(self, inputs, num_workers, batch_size, *args, **kwargs)
   1246     return next(
   1247         iter(
   1248             self.get_iterator(
   (...)
   1251         )
   1252     )
   1253 else:
-> 1254     return self.run_single(inputs, preprocess_params, forward_params, postprocess_params)

File D:\learning-ml\llm\tikz\.venv\Lib\site-packages\transformers\pipelines\base.py:1261, in Pipeline.run_single(self, inputs, preprocess_params, forward_params, postprocess_params)
   1259 def run_single(self, inputs, preprocess_params, forward_params, postprocess_params):
   1260     model_inputs = self.preprocess(inputs, **preprocess_params)
-> 1261     model_outputs = self.forward(model_inputs, **forward_params)
   1262     outputs = self.postprocess(model_outputs, **postprocess_params)
   1263     return outputs

File D:\learning-ml\llm\tikz\.venv\Lib\site-packages\transformers\pipelines\base.py:1161, in Pipeline.forward(self, model_inputs, **forward_params)
   1159     with inference_context():
   1160         model_inputs = self._ensure_tensor_on_device(model_inputs, device=self.device)
-> 1161         model_outputs = self._forward(model_inputs, **forward_params)
   1162         model_outputs = self._ensure_tensor_on_device(model_outputs, device=torch.device("cpu"))
   1163 else:

File D:\learning-ml\llm\tikz\.venv\Lib\site-packages\transformers\pipelines\image_to_text.py:181, in ImageToTextPipeline._forward(self, model_inputs, **generate_kwargs)
    176 # FIXME: We need to pop here due to a difference in how `generation.py` and `generation.tf_utils.py`
    177 #  parse inputs. In the Tensorflow version, `generate` raises an error if we don't use `input_ids` whereas
    178 #  the PyTorch version matches it with `self.model.main_input_name` or `self.model.encoder.main_input_name`
    179 #  in the `_prepare_model_inputs` method.
    180 inputs = model_inputs.pop(self.model.main_input_name)
--> 181 model_outputs = self.model.generate(inputs, **model_inputs, **generate_kwargs)
    182 return model_outputs

File D:\learning-ml\llm\tikz\.venv\Lib\site-packages\torch\utils\_contextlib.py:115, in context_decorator.<locals>.decorate_context(*args, **kwargs)
    112 @functools.wraps(func)
    113 def decorate_context(*args, **kwargs):
    114     with ctx_factory():
--> 115         return func(*args, **kwargs)

File D:\learning-ml\llm\tikz\.venv\Lib\site-packages\transformers\generation\utils.py:1786, in GenerationMixin.generate(self, inputs, generation_config, logits_processor, stopping_criteria, prefix_allowed_tokens_fn, synced_gpus, assistant_model, streamer, negative_prompt_ids, negative_prompt_attention_mask, **kwargs)
   1783         model_kwargs["past_key_values"] = DynamicCache.from_legacy_cache(past)
   1784         use_dynamic_cache_by_default = True
-> 1786 self._validate_generated_length(generation_config, input_ids_length, has_default_max_length)
   1788 # 7. determine generation mode
   1789 generation_mode = generation_config.get_generation_mode(assistant_model)

File D:\learning-ml\llm\tikz\.venv\Lib\site-packages\transformers\generation\utils.py:1257, in GenerationMixin._validate_generated_length(self, generation_config, input_ids_length, has_default_max_length)
   1255 if input_ids_length >= generation_config.max_length:
   1256     input_ids_string = "decoder_input_ids" if self.config.is_encoder_decoder else "input_ids"
-> 1257     raise ValueError(
   1258         f"Input length of {input_ids_string} is {input_ids_length}, but `max_length` is set to"
   1259         f" {generation_config.max_length}. This can lead to unexpected behavior. You should consider"
   1260         " increasing `max_length` or, better yet, setting `max_new_tokens`."
   1261     )
   1263 # 2. Min length warnings due to unfeasible parameter combinations
   1264 min_length_error_suffix = (
   1265     " Generation will stop at the defined maximum length. You should decrease the minimum length and/or "
   1266     "increase the maximum length."
   1267 )

Sign up or log in to comment