eson's picture
add compress rate
814ee6b
raw
history blame
No virus
492 Bytes
from vocab.gpt_35_turbo import tokenizer
text = "你好,请告诉我聚乙烯是什么"
encoding = tokenizer.encode(text)
print(tokenizer.decode([6744]))
print(tokenizer.convert_ids_to_tokens([6744]))
print(tokenizer.decode([100256])) # 是没有这个token吗?
print(tokenizer.convert_ids_to_tokens([100256]))
print(tokenizer.decode([100262]))
print(tokenizer.convert_ids_to_tokens([100262]))
print(tokenizer.decode([100273]))
print(tokenizer.convert_ids_to_tokens([100273]))