""" gpt_35_turbo decode UnicodeDecodeError 99413 b' \xe6\xb5' gpt_35_turbo decode UnicodeDecodeError 99742 b'\x8c\xa8' gpt_35_turbo decode UnicodeDecodeError 99834 b'\xad\x90' gpt_35_turbo decode UnicodeDecodeError 100112 b'\xe0\xae\xbf\xe0\xae' gpt_35_turbo decode KeyError 100256 gpt_35_turbo decode KeyError 100261 gpt_35_turbo decode KeyError 100262 gpt_35_turbo decode KeyError 100263 """ import json import tiktoken tokenizer = tiktoken.encoding_for_model('gpt-3.5-turbo') tokens = [100263, 99834] tokenizer.decode(tokens) tokenizer.decode(tokens) tokenizer._core_bpe.decode_bytes(tokens).decode("utf-8", errors="replace") for token_id in [100263, 99834]: # special_tokens: 200257-100260 100276 try: tokenizer.decode_tokens_bytes([token_id]) except: pass try: tokenizer.decode_single_token_bytes(token_id) except: pass try: tokenizer.decode_bytes([token_id]) except: pass