zxdu20 commited on
Commit
8492687
1 Parent(s): 0d8b08d

Remove image tokens when decoding

Browse files
Files changed (1) hide show
  1. tokenization_chatglm.py +1 -0
tokenization_chatglm.py CHANGED
@@ -130,6 +130,7 @@ class SPTokenizer:
130
 
131
  def decode(self, text_ids: List[int], special_tokens=False) -> str:
132
  ids = [int(_id) - self.num_image_tokens for _id in text_ids]
 
133
  text = self._get_text_tokenizer(encode_special_tokens=special_tokens).decode(ids)
134
  text = text.replace("<n>", "\n")
135
  text = text.replace(SPTokenizer.get_tab_token(), "\t")
 
130
 
131
  def decode(self, text_ids: List[int], special_tokens=False) -> str:
132
  ids = [int(_id) - self.num_image_tokens for _id in text_ids]
133
+ ids = [_id for _id in ids if _id >= 0]
134
  text = self._get_text_tokenizer(encode_special_tokens=special_tokens).decode(ids)
135
  text = text.replace("<n>", "\n")
136
  text = text.replace(SPTokenizer.get_tab_token(), "\t")