tianxie-sf
commited on
Commit
·
68f77de
1
Parent(s):
5fe0f1b
fix issue in get vocab (#14)
Browse files- fix issue in get vocab (ac8a04ffa83d403954e4bf9e88ee811a805b5a5d)
- tokenization_xgen.py +1 -1
tokenization_xgen.py
CHANGED
@@ -139,7 +139,7 @@ class XgenTokenizer(PreTrainedTokenizer):
|
|
139 |
|
140 |
def get_vocab(self):
|
141 |
"""Returns vocab as a dict"""
|
142 |
-
vocab = {self.
|
143 |
return vocab
|
144 |
|
145 |
def _tokenize(self, text, **kwargs):
|
|
|
139 |
|
140 |
def get_vocab(self):
|
141 |
"""Returns vocab as a dict"""
|
142 |
+
vocab = {self.encoder.decode_single_token_bytes(i): i for i in range(self.vocab_size)}
|
143 |
return vocab
|
144 |
|
145 |
def _tokenize(self, text, **kwargs):
|