Update tokenizer to fix bug
#3
by
Jinze
- opened
- tokenization_qwen.py +2 -1
tokenization_qwen.py
CHANGED
@@ -353,7 +353,8 @@ class QWenTokenizer(PreTrainedTokenizer):
|
|
353 |
else:
|
354 |
_tobytes = lambda x: x.encode('utf-8') if isinstance(x, str) else x
|
355 |
return [{'text': b''.join(map(_tobytes, map(self.decoder.get, tokens))).decode('utf-8')}]
|
356 |
-
|
|
|
357 |
return [{key: val}]
|
358 |
|
359 |
return _replace_closed_tag(
|
|
|
353 |
else:
|
354 |
_tobytes = lambda x: x.encode('utf-8') if isinstance(x, str) else x
|
355 |
return [{'text': b''.join(map(_tobytes, map(self.decoder.get, tokens))).decode('utf-8')}]
|
356 |
+
_tobytes = lambda x: x.encode('utf-8') if isinstance(x, str) else x
|
357 |
+
val = b''.join(map(_tobytes, map(self.decoder.get, tokens[1:-1]))).decode('utf-8')
|
358 |
return [{key: val}]
|
359 |
|
360 |
return _replace_closed_tag(
|