JoeyHeisenberg commited on
Commit
ccd3e9d
1 Parent(s): ac80538

fix the latest transformers's init problem

Browse files
Files changed (1) hide show
  1. tokenization_bluelm.py +6 -5
tokenization_bluelm.py CHANGED
@@ -73,6 +73,12 @@ class BlueLMTokenizer(PreTrainedTokenizer):
73
  eos_token = AddedToken(eos_token, lstrip=False, rstrip=False) if isinstance(eos_token, str) else eos_token
74
  unk_token = AddedToken(unk_token, lstrip=False, rstrip=False) if isinstance(unk_token, str) else unk_token
75
  pad_token = AddedToken(pad_token, lstrip=False, rstrip=False) if isinstance(pad_token, str) else pad_token
 
 
 
 
 
 
76
  super().__init__(
77
  bos_token=bos_token,
78
  eos_token=eos_token,
@@ -84,11 +90,6 @@ class BlueLMTokenizer(PreTrainedTokenizer):
84
  clean_up_tokenization_spaces=clean_up_tokenization_spaces,
85
  **kwargs,
86
  )
87
- self.vocab_file = vocab_file
88
- self.add_bos_token = add_bos_token
89
- self.add_eos_token = add_eos_token
90
- self.sp_model = spm.SentencePieceProcessor(**self.sp_model_kwargs)
91
- self.sp_model.Load(vocab_file)
92
 
93
  def __getstate__(self):
94
  state = self.__dict__.copy()
 
73
  eos_token = AddedToken(eos_token, lstrip=False, rstrip=False) if isinstance(eos_token, str) else eos_token
74
  unk_token = AddedToken(unk_token, lstrip=False, rstrip=False) if isinstance(unk_token, str) else unk_token
75
  pad_token = AddedToken(pad_token, lstrip=False, rstrip=False) if isinstance(pad_token, str) else pad_token
76
+
77
+ self.vocab_file = vocab_file
78
+ self.add_bos_token = add_bos_token
79
+ self.add_eos_token = add_eos_token
80
+ self.sp_model = spm.SentencePieceProcessor(**self.sp_model_kwargs)
81
+ self.sp_model.Load(vocab_file)
82
  super().__init__(
83
  bos_token=bos_token,
84
  eos_token=eos_token,
 
90
  clean_up_tokenization_spaces=clean_up_tokenization_spaces,
91
  **kwargs,
92
  )
 
 
 
 
 
93
 
94
  def __getstate__(self):
95
  state = self.__dict__.copy()