Ammar-alhaj-ali commited on
Commit
55f9d5f
1 Parent(s): ecf2448

Upload tokenizer_config.json

Browse files
Files changed (1) hide show
  1. tokenizer_config.json +87 -0
tokenizer_config.json ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": true,
3
+ "apply_ocr": false,
4
+ "bos_token": {
5
+ "__type": "AddedToken",
6
+ "content": "<s>",
7
+ "lstrip": false,
8
+ "normalized": true,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "cls_token": {
13
+ "__type": "AddedToken",
14
+ "content": "<s>",
15
+ "lstrip": false,
16
+ "normalized": true,
17
+ "rstrip": false,
18
+ "single_word": false
19
+ },
20
+ "cls_token_box": [
21
+ 0,
22
+ 0,
23
+ 0,
24
+ 0
25
+ ],
26
+ "eos_token": {
27
+ "__type": "AddedToken",
28
+ "content": "</s>",
29
+ "lstrip": false,
30
+ "normalized": true,
31
+ "rstrip": false,
32
+ "single_word": false
33
+ },
34
+ "errors": "replace",
35
+ "mask_token": {
36
+ "__type": "AddedToken",
37
+ "content": "<mask>",
38
+ "lstrip": true,
39
+ "normalized": true,
40
+ "rstrip": false,
41
+ "single_word": false
42
+ },
43
+ "model_max_length": 512,
44
+ "name_or_path": "microsoft/layoutlmv3-base",
45
+ "only_label_first_subword": true,
46
+ "pad_token": {
47
+ "__type": "AddedToken",
48
+ "content": "<pad>",
49
+ "lstrip": false,
50
+ "normalized": true,
51
+ "rstrip": false,
52
+ "single_word": false
53
+ },
54
+ "pad_token_box": [
55
+ 0,
56
+ 0,
57
+ 0,
58
+ 0
59
+ ],
60
+ "pad_token_label": -100,
61
+ "processor_class": "LayoutLMv3Processor",
62
+ "sep_token": {
63
+ "__type": "AddedToken",
64
+ "content": "</s>",
65
+ "lstrip": false,
66
+ "normalized": true,
67
+ "rstrip": false,
68
+ "single_word": false
69
+ },
70
+ "sep_token_box": [
71
+ 0,
72
+ 0,
73
+ 0,
74
+ 0
75
+ ],
76
+ "special_tokens_map_file": null,
77
+ "tokenizer_class": "LayoutLMv3Tokenizer",
78
+ "trim_offsets": true,
79
+ "unk_token": {
80
+ "__type": "AddedToken",
81
+ "content": "<unk>",
82
+ "lstrip": false,
83
+ "normalized": true,
84
+ "rstrip": false,
85
+ "single_word": false
86
+ }
87
+ }