File size: 965 Bytes
614012d 9495a4f f4973d4 9495a4f 614012d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 |
"""
gpt_35_turbo decode UnicodeDecodeError 99413 b' \xe6\xb5'
gpt_35_turbo decode UnicodeDecodeError 99742 b'\x8c\xa8'
gpt_35_turbo decode UnicodeDecodeError 99834 b'\xad\x90'
gpt_35_turbo decode UnicodeDecodeError 100112 b'\xe0\xae\xbf\xe0\xae'
gpt_35_turbo decode KeyError 100256
gpt_35_turbo decode KeyError 100261
gpt_35_turbo decode KeyError 100262
gpt_35_turbo decode KeyError 100263
"""
import json
import tiktoken
tokenizer = tiktoken.encoding_for_model('gpt-3.5-turbo')
tokens = [100263, 99834]
tokenizer.decode(tokens)
tokenizer.decode(tokens)
tokenizer._core_bpe.decode_bytes(tokens).decode("utf-8", errors="replace")
for token_id in [100263, 99834]: # special_tokens: 200257-100260 100276
try:
tokenizer.decode_tokens_bytes([token_id])
except:
pass
try:
tokenizer.decode_single_token_bytes(token_id)
except:
pass
try:
tokenizer.decode_bytes([token_id])
except:
pass
|