howto load in 4bit?

#3
by nudelbrot - opened

in :12 │
│ │
│ 9 │
│ 10 #config.max_seq_len = 4096 # (input + output) tokens can now be up to 4096 │
│ 11 │
│ ❱ 12 model = transformers.AutoModelForCausalLM.from_pretrained( │
│ 13 name, │
│ 14 device_map="auto", load_in_4bit=True, low_cpu_mem_usage=True, │
│ 15 #config=config, │
│ │
│ /work/jupyter/transformers/src/transformers/models/auto/auto_factory.py:479 in from_pretrained │
│ │
│ 476 │ │ │ │ class_ref, pretrained_model_name_or_path, **hub_kwargs, **kwargs │
│ 477 │ │ │ ) │
│ 478 │ │ │ _ = hub_kwargs.pop("code_revision", None) │
│ ❱ 479 │ │ │ return model_class.from_pretrained( │
│ 480 │ │ │ │ pretrained_model_name_or_path, *model_args, config=config, **hub_kwargs, │
│ 481 │ │ │ ) │
│ 482 │ │ elif type(config) in cls._model_mapping.keys(): │
│ │
│ /work/jupyter/transformers/src/transformers/modeling_utils.py:2600 in from_pretrained │
│ │
│ 2597 │ │ if from_pt: │
│ 2598 │ │ │ if not is_sharded and state_dict is None: │
│ 2599 │ │ │ │ # Time to load the checkpoint │
│ ❱ 2600 │ │ │ │ state_dict = load_state_dict(resolved_archive_file) │
│ 2601 │ │ │ │
│ 2602 │ │ │ # set dtype to instantiate the model under: │
│ 2603 │ │ │ # 1. If torch_dtype is not None, we use that dtype │
│ │
│ /work/jupyter/transformers/src/transformers/modeling_utils.py:442 in load_state_dict │
│ │
│ 439 │ │ # Check format of the archive │
│ 440 │ │ with safe_open(checkpoint_file, framework="pt") as f: │
│ 441 │ │ │ metadata = f.metadata() │
│ ❱ 442 │ │ if metadata.get("format") not in ["pt", "tf", "flax"]: │
│ 443 │ │ │ raise OSError( │
│ 444 │ │ │ │ f"The safetensors archive passed at {checkpoint_file} does not contain t │
│ 445 │ │ │ │ "you save your model with the save_pretrained method." │
╰──────────────────────────────────────────────────────────────────────────────────────────────────╯
AttributeError: 'NoneType' object has no attribute 'get'

Sign up or log in to comment