if001 commited on
Commit
e47ab47
1 Parent(s): 88b31c9

fix convert token to id

Browse files
Files changed (1) hide show
  1. sentencepiece_ja.py +3 -0
sentencepiece_ja.py CHANGED
@@ -41,6 +41,9 @@ class SentencePieceJA(PreTrainedTokenizer):
41
  return self._tokenizer.encode(text).tokens
42
 
43
  def _convert_token_to_id(self, token):
 
 
 
44
  return self._tokenizer.encode(token).ids[0]
45
 
46
  def _convert_id_to_token(self, index: int) -> str:
 
41
  return self._tokenizer.encode(text).tokens
42
 
43
  def _convert_token_to_id(self, token):
44
+ ids = self._tokenizer.encode(token).ids
45
+ if len(ids) == 0:
46
+ return self.unk_token_id
47
  return self._tokenizer.encode(token).ids[0]
48
 
49
  def _convert_id_to_token(self, index: int) -> str: