chavinlo/alpaca-native · Unpickling Error when loading model with lightning transformers

I use lightning transformers to do model parallelism for inference. alpaca-13b can be loaded successfully, but alpaca-native throws errors. Complete error message is given below. I also tried to load it with HuggingFace Transformers directly and it works well.

I am a new guy to the HuggingFace Community so please let me know if I should put this issue to anywhere else. Thank you!

UnpicklingError Traceback (most recent call last)
/data2/cxsun/anaconda3/envs/llm_gpu_clean/lib/python3.7/site-packages/transformers/modeling_utils.py in load_state_dict(checkpoint_file)
441 try:
--> 442 return torch.load(checkpoint_file, map_location="cpu")
443 except Exception as e:

/data2/cxsun/anaconda3/envs/llm_gpu_clean/lib/python3.7/site-packages/torch/serialization.py in load(f, map_location, pickle_module, weights_only, **pickle_load_args)
794 raise pickle.UnpicklingError(UNSAFE_MESSAGE + str(e)) from None
--> 795 return _legacy_load(opened_file, map_location, pickle_module, **pickle_load_args)
796

/data2/cxsun/anaconda3/envs/llm_gpu_clean/lib/python3.7/site-packages/torch/serialization.py in _legacy_load(f, map_location, pickle_module, **pickle_load_args)
1001
-> 1002 magic_number = pickle_module.load(f, **pickle_load_args)
1003 if magic_number != MAGIC_NUMBER:

UnpicklingError: invalid load key, '{'.

The above exception was the direct cause of the following exception:

ValueError Traceback (most recent call last)
/data2/cxsun/anaconda3/envs/llm_gpu_clean/lib/python3.7/site-packages/transformers/modeling_utils.py in load_state_dict(checkpoint_file)
455 "model. Make sure you have saved the model properly."
--> 456 ) from e
457 except (UnicodeDecodeError, ValueError):

ValueError: Unable to locate the file models/models--chavinlo--alpaca-native/snapshots/cc7773cac2478231807c56ef2f02292d98f85cf5/config.json which is necessary to load this pretrained model. Make sure you have saved the model properly.

During handling of the above exception, another exception occurred:

OSError Traceback (most recent call last)
/tmp/ipykernel_54854/208673554.py in
8 tokenizer=AutoTokenizer.from_pretrained(model_name, cache_dir=cache_dir),
9 low_cpu_mem_usage=True,
---> 10 device_map="auto",
11 )

/tmp/ipykernel_54854/614374815.py in init(self, non_padding_token_as_last, downstream_model_type, *args, **kwargs)
11 self, non_padding_token_as_last: bool = False, downstream_model_type: Type[_BaseAutoModelClass] = transformers.AutoModelForCausalLM, *args, **kwargs
12 ) -> None:
---> 13 super().init(downstream_model_type, *args, **kwargs)
14
15 self.set_new_experiment(non_padding_token_as_last)

/data2/cxsun/anaconda3/envs/llm_gpu_clean/lib/python3.7/site-packages/lightning_transformers/core/model.py in init(self, downstream_model_type, pretrained_model_name_or_path, tokenizer, pipeline_kwargs, load_weights, deepspeed_sharding, **model_data_kwargs)
62 self.pretrained_model_name_or_path = pretrained_model_name_or_path
63 if not self.deepspeed_sharding:
---> 64 self.initialize_model(self.pretrained_model_name_or_path)
65 self._tokenizer = tokenizer # necessary for hf_pipeline
66 self._hf_pipeline = None

/data2/cxsun/anaconda3/envs/llm_gpu_clean/lib/python3.7/site-packages/lightning_transformers/core/model.py in initialize_model(self, pretrained_model_name_or_path)
74 if self.load_weights:
75 self.model = self.downstream_model_type.from_pretrained(
---> 76 pretrained_model_name_or_path, **self.model_data_kwargs
77 )
78 else:

/data2/cxsun/anaconda3/envs/llm_gpu_clean/lib/python3.7/site-packages/transformers/models/auto/auto_factory.py in from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs)
470 model_class = _get_model_class(config, cls._model_mapping)
471 return model_class.from_pretrained(
--> 472 pretrained_model_name_or_path, *model_args, config=config, **hub_kwargs, **kwargs
473 )
474 raise ValueError(

/data2/cxsun/anaconda3/envs/llm_gpu_clean/lib/python3.7/site-packages/transformers/modeling_utils.py in from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs)
2558 if not is_sharded and state_dict is None:
2559 # Time to load the checkpoint
-> 2560 state_dict = load_state_dict(resolved_archive_file)
2561
2562 # set dtype to instantiate the model under:

/data2/cxsun/anaconda3/envs/llm_gpu_clean/lib/python3.7/site-packages/transformers/modeling_utils.py in load_state_dict(checkpoint_file)
457 except (UnicodeDecodeError, ValueError):
458 raise OSError(
--> 459 f"Unable to load weights from pytorch checkpoint file for '{checkpoint_file}' "
460 f"at '{checkpoint_file}'. "
461 "If you tried to load a PyTorch model from a TF 2.0 checkpoint, please set from_tf=True."

OSError: Unable to load weights from pytorch checkpoint file for 'models/models--chavinlo--alpaca-native/snapshots/cc7773cac2478231807c56ef2f02292d98f85cf5/config.json' at 'models/models--chavinlo--alpaca-native/snapshots/cc7773cac2478231807c56ef2f02292d98f85cf5/config.json'. If you tried to load a PyTorch model from a TF 2.0 checkpoint, please set from_tf=True.