GradientGuru commited on
Commit
5e81b14
1 Parent(s): e837cb3

Update tokenization_baichuan.py

Browse files
Files changed (1) hide show
  1. tokenization_baichuan.py +2 -0
tokenization_baichuan.py CHANGED
@@ -72,11 +72,13 @@ class BaichuanTokenizer(PreTrainedTokenizer):
72
  eos_token = AddedToken(eos_token, lstrip=False, rstrip=False) if isinstance(eos_token, str) else eos_token
73
  unk_token = AddedToken(unk_token, lstrip=False, rstrip=False) if isinstance(unk_token, str) else unk_token
74
  pad_token = AddedToken(pad_token, lstrip=False, rstrip=False) if isinstance(pad_token, str) else pad_token
 
75
  self.vocab_file = vocab_file
76
  self.add_bos_token = add_bos_token
77
  self.add_eos_token = add_eos_token
78
  self.sp_model = spm.SentencePieceProcessor(**self.sp_model_kwargs)
79
  self.sp_model.Load(vocab_file)
 
80
  super().__init__(
81
  bos_token=bos_token,
82
  eos_token=eos_token,
 
72
  eos_token = AddedToken(eos_token, lstrip=False, rstrip=False) if isinstance(eos_token, str) else eos_token
73
  unk_token = AddedToken(unk_token, lstrip=False, rstrip=False) if isinstance(unk_token, str) else unk_token
74
  pad_token = AddedToken(pad_token, lstrip=False, rstrip=False) if isinstance(pad_token, str) else pad_token
75
+
76
  self.vocab_file = vocab_file
77
  self.add_bos_token = add_bos_token
78
  self.add_eos_token = add_eos_token
79
  self.sp_model = spm.SentencePieceProcessor(**self.sp_model_kwargs)
80
  self.sp_model.Load(vocab_file)
81
+
82
  super().__init__(
83
  bos_token=bos_token,
84
  eos_token=eos_token,