eson commited on
Commit
aa0c637
1 Parent(s): da93e39
vocab/moss/moss-moon-003-sft/tokenization_moss.py CHANGED
@@ -146,6 +146,11 @@ class MossTokenizer(PreTrainedTokenizer):
146
  eos_token = AddedToken(eos_token, lstrip=False, rstrip=False) if isinstance(eos_token, str) else eos_token
147
  unk_token = AddedToken(unk_token, lstrip=False, rstrip=False) if isinstance(unk_token, str) else unk_token
148
  pad_token = AddedToken(pad_token, lstrip=False, rstrip=False) if isinstance(pad_token, str) else pad_token
 
 
 
 
 
149
  super().__init__(
150
  errors=errors,
151
  unk_token=unk_token,
@@ -156,10 +161,7 @@ class MossTokenizer(PreTrainedTokenizer):
156
  add_bos_token=add_bos_token,
157
  **kwargs,
158
  )
159
- self.add_bos_token = add_bos_token
160
 
161
- with open(vocab_file, encoding="utf-8") as vocab_handle:
162
- self.encoder = json.load(vocab_handle)
163
  self.decoder = {v: k for k, v in self.encoder.items()}
164
  self.errors = errors # how to handle errors in decoding
165
  self.byte_encoder = bytes_to_unicode()
 
146
  eos_token = AddedToken(eos_token, lstrip=False, rstrip=False) if isinstance(eos_token, str) else eos_token
147
  unk_token = AddedToken(unk_token, lstrip=False, rstrip=False) if isinstance(unk_token, str) else unk_token
148
  pad_token = AddedToken(pad_token, lstrip=False, rstrip=False) if isinstance(pad_token, str) else pad_token
149
+ self.add_bos_token = add_bos_token
150
+
151
+ with open(vocab_file, encoding="utf-8") as vocab_handle:
152
+ self.encoder = json.load(vocab_handle)
153
+
154
  super().__init__(
155
  errors=errors,
156
  unk_token=unk_token,
 
161
  add_bos_token=add_bos_token,
162
  **kwargs,
163
  )
 
164
 
 
 
165
  self.decoder = {v: k for k, v in self.encoder.items()}
166
  self.errors = errors # how to handle errors in decoding
167
  self.byte_encoder = bytes_to_unicode()