xu-song's picture
fix unicode error: 'unicodeescape' codec can't decode bytes in position 602-608: unknown Unicode character name
bce41d0
raw
history blame
384 Bytes
"""
## 默认 use_fast=True 报错
lib\site-packages\transformers\tokenization_utils_fast.py", line 504, in _batch_encode_plus
encodings = self._tokenizer.encode_batch(
pyo3_runtime.PanicException: AddedVocabulary bad split
"""
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained("lmsys/fastchat-t5-3b-v1.0", trust_remote_code=True, use_fast=False)