bayartsogt commited on
Commit
0afb4f9
1 Parent(s): 3fff62f

trust_remote_code=True

Browse files
Files changed (2) hide show
  1. app.py +1 -1
  2. test_transformers.py +12 -0
app.py CHANGED
@@ -19,7 +19,7 @@ openai_tokenizer_list = [
19
 
20
  # load tokenizers
21
  hf_tokenizers = [
22
- AutoTokenizer.from_pretrained(model_name_or_id, use_fast=use_fast)
23
  for model_name_or_id, use_fast in hf_tokenizer_list
24
  ]
25
 
 
19
 
20
  # load tokenizers
21
  hf_tokenizers = [
22
+ AutoTokenizer.from_pretrained(model_name_or_id, use_fast=use_fast, trust_remote_code=True)
23
  for model_name_or_id, use_fast in hf_tokenizer_list
24
  ]
25
 
test_transformers.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoTokenizer
2
+
3
+
4
+ tokenizer = AutoTokenizer.from_pretrained('tugstugi/bert-large-mongolian-cased', use_fast=False)
5
+
6
+
7
+ test_input = "Мөнгөө тушаачихсаныхаа дараа мэдэгдээрэй"
8
+
9
+ print("input:", test_input)
10
+ print("tokenizer.encode()", tokenizer.encode(test_input))
11
+ print("tokenizer decode", [(tokenizer.decode(token_id), token_id) for token_id in tokenizer.encode(test_input)])
12
+ print("tokenizer()", tokenizer(test_input))