tianxie-sf commited on
Commit
943f44c
1 Parent(s): 2aa9556

Update tokenization_xgen.py (#16)

Browse files

- Update tokenization_xgen.py (f79aeb3407323d2df16be52abe61dbed426d58b9)

Files changed (1) hide show
  1. tokenization_xgen.py +3 -3
tokenization_xgen.py CHANGED
@@ -134,15 +134,15 @@ class XgenTokenizer(PreTrainedTokenizer):
134
  ):
135
  pad_token_added = AddedToken(pad_token, lstrip=False, rstrip=False) if isinstance(pad_token, str) else pad_token
136
  eos_token_added = AddedToken(eos_token, lstrip=False, rstrip=False) if isinstance(eos_token, str) else eos_token
 
 
137
  super().__init__(
138
  pad_token=pad_token_added,
139
  eos_token=eos_token_added,
140
  add_eos_token=add_eos_token,
141
  add_special_tokens=add_special_tokens,
142
  **kwargs,
143
- )
144
- self.add_eos_token = add_eos_token
145
- self.encoder = tiktoken_tokenizer(base="gpt2", pad_token=pad_token, add_special=add_special_tokens)
146
 
147
  @property
148
  def vocab_size(self):
 
134
  ):
135
  pad_token_added = AddedToken(pad_token, lstrip=False, rstrip=False) if isinstance(pad_token, str) else pad_token
136
  eos_token_added = AddedToken(eos_token, lstrip=False, rstrip=False) if isinstance(eos_token, str) else eos_token
137
+ self.add_eos_token = add_eos_token
138
+ self.encoder = tiktoken_tokenizer(base="gpt2", pad_token=pad_token, add_special=add_special_tokens)
139
  super().__init__(
140
  pad_token=pad_token_added,
141
  eos_token=eos_token_added,
142
  add_eos_token=add_eos_token,
143
  add_special_tokens=add_special_tokens,
144
  **kwargs,
145
+ )
 
 
146
 
147
  @property
148
  def vocab_size(self):