Norm commited on
Commit
fac39e5
1 Parent(s): 9cee5d1

Upload tokenizer_config.json

Browse files
Files changed (1) hide show
  1. tokenizer_config.json +12 -0
tokenizer_config.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_tokenize_postprocess": false,
3
+ "sep_token": "[SEP]",
4
+ "cls_token": "[CLS]",
5
+ "unk_token": "[UNK]",
6
+ "pad_token": "[PAD]",
7
+ "mask_token": "[MASK]",
8
+ "do_lower_case": true,
9
+ "model_max_length": 512,
10
+ "vocab_file": "/home/ysocr/data/pretrain/ernie-layoutx-base-uncased/torch_version/vocab.txt",
11
+ "sentencepiece_model_file": "/home/ysocr/data/pretrain/ernie-layoutx-base-uncased/torch_version/sentencepiece.bpe.model"
12
+ }