Huggingface code to inference this model
I am trying to run this model using huggingface but i am getting error , running on Kaggle 2xT4
#####################CODE
from transformers import Qwen2_5_VLForConditionalGeneration, AutoTokenizer, AutoProcessor
from qwen_vl_utils import process_vision_info
import torch
model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
"unsloth/Qwen2-VL-7B-Instruct-bnb-4bit", load_in_4bit = True,torch_dtype=torch.float16, device_map="auto"
)
processor = AutoProcessor.from_pretrained("unsloth/Qwen2-VL-7B-Instruct-bnb-4bit")
#################### ERROR
The load_in_4bit
and load_in_8bit
arguments are deprecated and will be removed in the future versions. Please, pass a BitsAndBytesConfig
object in quantization_config
argument instead.
You are using a model of type qwen2_vl to instantiate a model of type qwen2_5_vl. This is not supported for all configurations of models and can yield errors.
Unused kwargs: ['_load_in_4bit', '_load_in_8bit', 'quant_method']. These kwargs are not used in <class 'transformers.utils.quantization_config.BitsAndBytesConfig'>.
/opt/conda/lib/python3.10/site-packages/transformers/quantizers/auto.py:195: UserWarning: You passed quantization_config
or equivalent parameters to from_pretrained
but the model you're loading already has a quantization_config
attribute. The quantization_config
from the model will be used.
warnings.warn(warning_msg)
[Open Browser Console for more detailed log - Double click to close this message]
Failed to load model class 'HBoxModel' from module '@jupyter-widgets/controls'
Error: Module @jupyter-widgets/controls, version ^1.5.0 is not registered, however, 2.0.0 is
at f.loadClass (https://kkb-production.jupyter-proxy.kaggle.net/static/dist/jupyterlab/v-45a2b7357cb926cda99c4f837edd0b05/lab/extensions/@jupyter-widgets/jupyterlab-manager/static/446.fdf8b1b233cb8c1783f6.js?v=fdf8b1b233cb8c1783f6:1:75041)
at f.loadModelClass (https://kkb-production.jupyter-proxy.kaggle.net/static/dist/jupyterlab/v-45a2b7357cb926cda99c4f837edd0b05/lab/extensions/@jupyter-widgets/jupyterlab-manager/static/327.68dbf8491690b3aff1e7.js?v=68dbf8491690b3aff1e7:1:10729)
at f._make_model (https://kkb-production.jupyter-proxy.kaggle.net/static/dist/jupyterlab/v-45a2b7357cb926cda99c4f837edd0b05/lab/extensions/@jupyter-widgets/jupyterlab-manager/static/327.68dbf8491690b3aff1e7.js?v=68dbf8491690b3aff1e7:1:7517)
at f.new_model (https://kkb-production.jupyter-proxy.kaggle.net/static/dist/jupyterlab/v-45a2b7357cb926cda99c4f837edd0b05/lab/extensions/@jupyter-widgets/jupyterlab-manager/static/327.68dbf8491690b3aff1e7.js?v=68dbf8491690b3aff1e7:1:5137)
at f.handle_comm_open (https://kkb-production.jupyter-proxy.kaggle.net/static/dist/jupyterlab/v-45a2b7357cb926cda99c4f837edd0b05/lab/extensions/@jupyter-widgets/jupyterlab-manager/static/327.68dbf8491690b3aff1e7.js?v=68dbf8491690b3aff1e7:1:3894)
at _handleCommOpen (https://kkb-production.jupyter-proxy.kaggle.net/static/dist/jupyterlab/v-45a2b7357cb926cda99c4f837edd0b05/lab/extensions/@jupyter-widgets/jupyterlab-manager/static/446.fdf8b1b233cb8c1783f6.js?v=fdf8b1b233cb8c1783f6:1:73457)
at f._handleCommOpen (https://kkb-production.jupyter-proxy.kaggle.net/static/dist/jupyterlab/v-45a2b7357cb926cda99c4f837edd0b05/notebook/3676.bundle.js:1:30958)
at async f._handleMessage (https://kkb-production.jupyter-proxy.kaggle.net/static/dist/jupyterlab/v-45a2b7357cb926cda99c4f837edd0b05/notebook/3676.bundle.js:1:32852)
ValueError Traceback (most recent call last)
Cell In[1], line 11
4 import torch
6 # default: Load the model on the available device(s)
7 # model = Qwen2VLForConditionalGeneration.from_pretrained(
8 # "Qwen/Qwen2-VL-7B-Instruct", torch_dtype=torch.float16, device_map="auto"
9 # )
---> 11 model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
12 "unsloth/Qwen2-VL-7B-Instruct-bnb-4bit", load_in_4bit = True,torch_dtype=torch.float16, device_map="auto"
13 )
15 # We recommend enabling flash_attention_2 for better acceleration and memory saving, especially in multi-image and video scenarios.
16 # model = Qwen2VLForConditionalGeneration.from_pretrained(
17 # "Qwen/Qwen2-VL-7B-Instruct",
(...)
23 # default processer
24 # processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-7B-Instruct",min_pixels = 2562828 ,max_pixels = 12802828)
26 processor = AutoProcessor.from_pretrained("unsloth/Qwen2-VL-7B-Instruct-bnb-4bit")
File /opt/conda/lib/python3.10/site-packages/transformers/modeling_utils.py:4270, in PreTrainedModel.from_pretrained(cls, pretrained_model_name_or_path, config, cache_dir, ignore_mismatched_sizes, force_download, local_files_only, token, revision, use_safetensors, weights_only, *model_args, **kwargs)
4260 if dtype_orig is not None:
4261 torch.set_default_dtype(dtype_orig)
4263 (
4264 model,
4265 missing_keys,
4266 unexpected_keys,
4267 mismatched_keys,
4268 offload_index,
4269 error_msgs,
-> 4270 ) = cls._load_pretrained_model(
4271 model,
4272 state_dict,
4273 loaded_state_dict_keys, # XXX: rename?
4274 resolved_archive_file,
4275 pretrained_model_name_or_path,
4276 ignore_mismatched_sizes=ignore_mismatched_sizes,
4277 sharded_metadata=sharded_metadata,
4278 _fast_init=_fast_init,
4279 low_cpu_mem_usage=low_cpu_mem_usage,
4280 device_map=device_map,
4281 offload_folder=offload_folder,
4282 offload_state_dict=offload_state_dict,
4283 dtype=torch_dtype,
4284 hf_quantizer=hf_quantizer,
4285 keep_in_fp32_modules=keep_in_fp32_modules,
4286 gguf_path=gguf_path,
4287 weights_only=weights_only,
4288 )
4290 # make sure token embedding weights are still tied if needed
4291 model.tie_weights()
File /opt/conda/lib/python3.10/site-packages/transformers/modeling_utils.py:4848, in PreTrainedModel._load_pretrained_model(cls, model, state_dict, loaded_keys, resolved_archive_file, pretrained_model_name_or_path, ignore_mismatched_sizes, sharded_metadata, _fast_init, low_cpu_mem_usage, device_map, offload_folder, offload_state_dict, dtype, hf_quantizer, keep_in_fp32_modules, gguf_path, weights_only)
4846 else:
4847 fixed_state_dict = cls._fix_state_dict_keys_on_load(state_dict)
-> 4848 new_error_msgs, offload_index, state_dict_index = _load_state_dict_into_meta_model(
4849 model_to_load,
4850 fixed_state_dict,
4851 start_prefix,
4852 expected_keys,
4853 device_map=device_map,
4854 offload_folder=offload_folder,
4855 offload_index=offload_index,
4856 state_dict_folder=state_dict_folder,
4857 state_dict_index=state_dict_index,
4858 dtype=dtype,
4859 hf_quantizer=hf_quantizer,
4860 is_safetensors=is_safetensors,
4861 keep_in_fp32_modules=keep_in_fp32_modules,
4862 unexpected_keys=unexpected_keys,
4863 )
4864 error_msgs += new_error_msgs
4865 else:
4866 # Sharded checkpoint or whole but low_cpu_mem_usage==True
File /opt/conda/lib/python3.10/site-packages/transformers/modeling_utils.py:876, in _load_state_dict_into_meta_model(model, state_dict, start_prefix, expected_keys, device_map, offload_folder, offload_index, state_dict_folder, state_dict_index, dtype, hf_quantizer, is_safetensors, keep_in_fp32_modules, unexpected_keys, pretrained_model_name_or_path)
873 param_device = "cpu" if is_local_dist_rank_0() else "meta"
875 # For backward compatibility with older versions of accelerate
and for non-quantized params
--> 876 set_module_tensor_to_device(model, param_name, param_device, **set_module_kwargs)
877 else:
878 hf_quantizer.create_quantized_param(model, param, param_name, param_device, state_dict, unexpected_keys)
File /opt/conda/lib/python3.10/site-packages/accelerate/utils/modeling.py:373, in set_module_tensor_to_device(module, tensor_name, device, value, dtype, fp16_statistics, tied_params_map)
369 if value is not None:
370 # We can expect mismatches when using bnb 4bit since Params4bit will reshape and pack the weights.
371 # In other cases, we want to make sure we're not loading checkpoints that do not match the config.
372 if old_value.shape != value.shape and param_cls.name != "Params4bit":
--> 373 raise ValueError(
374 f'Trying to set a tensor of shape {value.shape} in "{tensor_name}" (which has shape {old_value.shape}), this looks incorrect.'
375 )
377 if dtype is None:
378 # For compatibility with PyTorch load_state_dict which converts state dict dtype to existing dtype in model
379 value = value.to(old_value.dtype)
ValueError: Trying to set a tensor of shape torch.Size([1280]) in "weight" (which has shape torch.Size([3584])), this looks incorrect.
also when running the qwen 2.5 vl instruct in the shared colab notebook i get error
🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!
RuntimeError Traceback (most recent call last)
in <cell line: 0>()
20 ] # More models at https://huggingface.co/unsloth
21
---> 22 model, tokenizer = FastVisionModel.from_pretrained(
23 "unsloth/Qwen2.5-VL-7B-Instruct-unsloth-bnb-4bit",
24 load_in_4bit = True, # Use 4bit to reduce memory use. False for 16bit LoRA.
/usr/local/lib/python3.11/dist-packages/unsloth/models/loader.py in from_pretrained(model_name, max_seq_length, dtype, load_in_4bit, token, device_map, rope_scaling, fix_tokenizer, trust_remote_code, use_gradient_checkpointing, resize_model_vocab, revision, return_logits, fullgraph, use_exact_model_name, *args, **kwargs)
446 f"to obtain the latest transformers build, then restart this session."
447 )
--> 448 raise RuntimeError(autoconfig_error or peft_error)
449 pass
450
RuntimeError: The checkpoint you are trying to load has model type qwen2_5_vl
but Transformers does not recognize this architecture. This could be because of an issue with the checkpoint, or because your version of Transformers is out of date.