samleeasus commited on
Commit
966a300
1 Parent(s): c3a8fbf

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +1 -2
README.md CHANGED
@@ -10,11 +10,10 @@ tokenizer = LlamaTokenizer.from_pretrained(
10
  )
11
 
12
  print('vocab size:',tokenizer.vocab_size)
13
- #vocab size: 52992
14
 
15
  text = '今天天氣真好!'
16
 
17
- print([k for k, v in tokenizer.get_vocab().items() if v > tokenizer.vocab_size -7])
18
 
19
  print(tokenizer.tokenize(text))
20
  #['▁', '今天', '天氣', '真', '好', '<0xEF>', '<0xBC>', '<0x81>']
 
10
  )
11
 
12
  print('vocab size:',tokenizer.vocab_size)
13
+ #vocab size: 52928
14
 
15
  text = '今天天氣真好!'
16
 
 
17
 
18
  print(tokenizer.tokenize(text))
19
  #['▁', '今天', '天氣', '真', '好', '<0xEF>', '<0xBC>', '<0x81>']