File size: 492 Bytes
f4973d4
 
 
 
d27a756
 
 
 
 
 
 
814ee6b
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21

from vocab.gpt_35_turbo import tokenizer


text = "你好,请告诉我聚乙烯是什么"
encoding = tokenizer.encode(text)


print(tokenizer.decode([6744]))
print(tokenizer.convert_ids_to_tokens([6744]))

print(tokenizer.decode([100256]))   # 是没有这个token吗?
print(tokenizer.convert_ids_to_tokens([100256]))


print(tokenizer.decode([100262]))
print(tokenizer.convert_ids_to_tokens([100262]))

print(tokenizer.decode([100273]))
print(tokenizer.convert_ids_to_tokens([100273]))