eson commited on
Commit
5c28f98
1 Parent(s): 93a65d3

Fix vocab_size

Browse files
Files changed (1) hide show
  1. tokenization_qwen.py +4 -1
tokenization_qwen.py CHANGED
@@ -237,7 +237,10 @@ class QWenTokenizer(PreTrainedTokenizer):
237
 
238
  @property
239
  def vocab_size(self):
240
- return self.tokenizer.n_vocab
 
 
 
241
 
242
  def _convert_id_to_token(self, index: int) -> Union[bytes, str]:
243
  """Converts an id to a token, special tokens included"""
 
237
 
238
  @property
239
  def vocab_size(self):
240
+ """
241
+ Size of the base vocabulary (without the added tokens).
242
+ """
243
+ return len(self.mergeable_ranks)
244
 
245
  def _convert_id_to_token(self, index: int) -> Union[bytes, str]:
246
  """Converts an id to a token, special tokens included"""