tianxie-sf commited on
Commit
1a0f468
1 Parent(s): 3987e09

fix issue in get vocab (#18)

Browse files

- fix issue in get vocab (c2f5b283e79e4d6e9125d8fd1c1a170b17415b7a)

Files changed (1) hide show
  1. tokenization_xgen.py +1 -1
tokenization_xgen.py CHANGED
@@ -139,7 +139,7 @@ class XgenTokenizer(PreTrainedTokenizer):
139
 
140
  def get_vocab(self):
141
  """Returns vocab as a dict"""
142
- vocab = {self._convert_id_to_token(i): i for i in range(self.vocab_size)}
143
  return vocab
144
 
145
  def _tokenize(self, text, **kwargs):
 
139
 
140
  def get_vocab(self):
141
  """Returns vocab as a dict"""
142
+ vocab = {self.encoder.decode_single_token_bytes(i): i for i in range(self.vocab_size)}
143
  return vocab
144
 
145
  def _tokenize(self, text, **kwargs):