File size: 477 Bytes
0afb4f9
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
from transformers import AutoTokenizer


tokenizer = AutoTokenizer.from_pretrained('tugstugi/bert-large-mongolian-cased', use_fast=False)


test_input = "Мөнгөө тушаачихсаныхаа дараа мэдэгдээрэй"

print("input:", test_input)
print("tokenizer.encode()", tokenizer.encode(test_input))
print("tokenizer decode", [(tokenizer.decode(token_id), token_id) for token_id in tokenizer.encode(test_input)])
print("tokenizer()", tokenizer(test_input))