Fix vocab_size
#56
by
xu-song
- opened
- tokenization_qwen.py +4 -1
tokenization_qwen.py
CHANGED
@@ -237,7 +237,10 @@ class QWenTokenizer(PreTrainedTokenizer):
|
|
237 |
|
238 |
@property
|
239 |
def vocab_size(self):
|
240 |
-
|
|
|
|
|
|
|
241 |
|
242 |
def _convert_id_to_token(self, index: int) -> Union[bytes, str]:
|
243 |
"""Converts an id to a token, special tokens included"""
|
|
|
237 |
|
238 |
@property
|
239 |
def vocab_size(self):
|
240 |
+
"""
|
241 |
+
Size of the base vocabulary (without the added tokens).
|
242 |
+
"""
|
243 |
+
return len(self.mergeable_ranks)
|
244 |
|
245 |
def _convert_id_to_token(self, index: int) -> Union[bytes, str]:
|
246 |
"""Converts an id to a token, special tokens included"""
|