tianxie-sf commited on
Commit
a473726
1 Parent(s): d61d475

Update tokenization_xgen.py

Browse files
Files changed (1) hide show
  1. tokenization_xgen.py +2 -2
tokenization_xgen.py CHANGED
@@ -134,6 +134,8 @@ class XgenTokenizer(PreTrainedTokenizer):
134
  ):
135
  pad_token_added = AddedToken(pad_token, lstrip=False, rstrip=False) if isinstance(pad_token, str) else pad_token
136
  eos_token_added = AddedToken(eos_token, lstrip=False, rstrip=False) if isinstance(eos_token, str) else eos_token
 
 
137
  super().__init__(
138
  pad_token=pad_token_added,
139
  eos_token=eos_token_added,
@@ -141,8 +143,6 @@ class XgenTokenizer(PreTrainedTokenizer):
141
  add_special_tokens=add_special_tokens,
142
  **kwargs,
143
  )
144
- self.add_eos_token = add_eos_token
145
- self.encoder = tiktoken_tokenizer(base="gpt2", pad_token=pad_token, add_special=add_special_tokens)
146
 
147
  @property
148
  def vocab_size(self):
 
134
  ):
135
  pad_token_added = AddedToken(pad_token, lstrip=False, rstrip=False) if isinstance(pad_token, str) else pad_token
136
  eos_token_added = AddedToken(eos_token, lstrip=False, rstrip=False) if isinstance(eos_token, str) else eos_token
137
+ self.add_eos_token = add_eos_token
138
+ self.encoder = tiktoken_tokenizer(base="gpt2", pad_token=pad_token, add_special=add_special_tokens)
139
  super().__init__(
140
  pad_token=pad_token_added,
141
  eos_token=eos_token_added,
 
143
  add_special_tokens=add_special_tokens,
144
  **kwargs,
145
  )
 
 
146
 
147
  @property
148
  def vocab_size(self):