tianxie-sf commited on
Commit
49019dd
1 Parent(s): 9936980

fix issue in get vocab (#9)

Browse files

- fix issue in get vocab (09afc931c1946c3bc23d1c68f9527f2ea639b99f)

Files changed (1) hide show
  1. tokenization_xgen.py +1 -1
tokenization_xgen.py CHANGED
@@ -139,7 +139,7 @@ class XgenTokenizer(PreTrainedTokenizer):
139
 
140
  def get_vocab(self):
141
  """Returns vocab as a dict"""
142
- vocab = {self._convert_id_to_token(i): i for i in range(self.vocab_size)}
143
  return vocab
144
 
145
  def _tokenize(self, text, **kwargs):
 
139
 
140
  def get_vocab(self):
141
  """Returns vocab as a dict"""
142
+ vocab = {self.encoder.decode_single_token_bytes(i): i for i in range(self.vocab_size)}
143
  return vocab
144
 
145
  def _tokenize(self, text, **kwargs):