Update tokenizer to fix bug

#3
by Jinze - opened
Files changed (1) hide show
  1. tokenization_qwen.py +2 -1
tokenization_qwen.py CHANGED
@@ -353,7 +353,8 @@ class QWenTokenizer(PreTrainedTokenizer):
353
  else:
354
  _tobytes = lambda x: x.encode('utf-8') if isinstance(x, str) else x
355
  return [{'text': b''.join(map(_tobytes, map(self.decoder.get, tokens))).decode('utf-8')}]
356
- val = b''.join(map(self.decoder.get, tokens[1:-1])).decode('utf-8')
 
357
  return [{key: val}]
358
 
359
  return _replace_closed_tag(
 
353
  else:
354
  _tobytes = lambda x: x.encode('utf-8') if isinstance(x, str) else x
355
  return [{'text': b''.join(map(_tobytes, map(self.decoder.get, tokens))).decode('utf-8')}]
356
+ _tobytes = lambda x: x.encode('utf-8') if isinstance(x, str) else x
357
+ val = b''.join(map(_tobytes, map(self.decoder.get, tokens[1:-1]))).decode('utf-8')
358
  return [{key: val}]
359
 
360
  return _replace_closed_tag(