tianxie-sf commited on
Commit
68f77de
1 Parent(s): 5fe0f1b

fix issue in get vocab (#14)

Browse files

- fix issue in get vocab (ac8a04ffa83d403954e4bf9e88ee811a805b5a5d)

Files changed (1) hide show
  1. tokenization_xgen.py +1 -1
tokenization_xgen.py CHANGED
@@ -139,7 +139,7 @@ class XgenTokenizer(PreTrainedTokenizer):
139
 
140
  def get_vocab(self):
141
  """Returns vocab as a dict"""
142
- vocab = {self._convert_id_to_token(i): i for i in range(self.vocab_size)}
143
  return vocab
144
 
145
  def _tokenize(self, text, **kwargs):
 
139
 
140
  def get_vocab(self):
141
  """Returns vocab as a dict"""
142
+ vocab = {self.encoder.decode_single_token_bytes(i): i for i in range(self.vocab_size)}
143
  return vocab
144
 
145
  def _tokenize(self, text, **kwargs):