GradientGuru commited on
Commit
ed70bed
1 Parent(s): c0a7afe

Update tokenization_baichuan.py

Browse files
Files changed (1) hide show
  1. tokenization_baichuan.py +7 -5
tokenization_baichuan.py CHANGED
@@ -68,6 +68,13 @@ class BaichuanTokenizer(PreTrainedTokenizer):
68
  if isinstance(pad_token, str)
69
  else pad_token
70
  )
 
 
 
 
 
 
 
71
  super().__init__(
72
  bos_token=bos_token,
73
  eos_token=eos_token,
@@ -79,11 +86,6 @@ class BaichuanTokenizer(PreTrainedTokenizer):
79
  clean_up_tokenization_spaces=clean_up_tokenization_spaces,
80
  **kwargs,
81
  )
82
- self.vocab_file = vocab_file
83
- self.add_bos_token = add_bos_token
84
- self.add_eos_token = add_eos_token
85
- self.sp_model = spm.SentencePieceProcessor(**self.sp_model_kwargs)
86
- self.sp_model.Load(vocab_file)
87
 
88
  def __getstate__(self):
89
  state = self.__dict__.copy()
 
68
  if isinstance(pad_token, str)
69
  else pad_token
70
  )
71
+
72
+ self.vocab_file = vocab_file
73
+ self.add_bos_token = add_bos_token
74
+ self.add_eos_token = add_eos_token
75
+ self.sp_model = spm.SentencePieceProcessor(**self.sp_model_kwargs)
76
+ self.sp_model.Load(vocab_file)
77
+
78
  super().__init__(
79
  bos_token=bos_token,
80
  eos_token=eos_token,
 
86
  clean_up_tokenization_spaces=clean_up_tokenization_spaces,
87
  **kwargs,
88
  )
 
 
 
 
 
89
 
90
  def __getstate__(self):
91
  state = self.__dict__.copy()