barzo commited on
Commit
133018a
1 Parent(s): b8a5638

Update tokenizer_config.json

Browse files
Files changed (1) hide show
  1. tokenizer_config.json +73 -7
tokenizer_config.json CHANGED
@@ -1,7 +1,36 @@
1
  {
2
- "bos_token": "<s>",
3
- "cls_token": "<s>",
4
- "eos_token": "</s>",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  "mask_token": {
6
  "__type": "AddedToken",
7
  "content": "<mask>",
@@ -11,9 +40,46 @@
11
  "single_word": false
12
  },
13
  "model_max_length": 512,
14
- "pad_token": "<pad>",
15
- "sep_token": "</s>",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  "special_tokens_map_file": null,
17
- "tokenizer_class": "XLMRobertaTokenizer",
18
- "unk_token": "<unk>"
 
 
 
 
 
 
 
 
19
  }
 
1
  {
2
+ "add_prefix_space": true,
3
+ "bos_token": {
4
+ "__type": "AddedToken",
5
+ "content": "<s>",
6
+ "lstrip": false,
7
+ "normalized": true,
8
+ "rstrip": false,
9
+ "single_word": false
10
+ },
11
+ "cls_token": {
12
+ "__type": "AddedToken",
13
+ "content": "<s>",
14
+ "lstrip": false,
15
+ "normalized": true,
16
+ "rstrip": false,
17
+ "single_word": false
18
+ },
19
+ "cls_token_box": [
20
+ 0,
21
+ 0,
22
+ 0,
23
+ 0
24
+ ],
25
+ "eos_token": {
26
+ "__type": "AddedToken",
27
+ "content": "</s>",
28
+ "lstrip": false,
29
+ "normalized": true,
30
+ "rstrip": false,
31
+ "single_word": false
32
+ },
33
+ "errors": "replace",
34
  "mask_token": {
35
  "__type": "AddedToken",
36
  "content": "<mask>",
 
40
  "single_word": false
41
  },
42
  "model_max_length": 512,
43
+ "name_or_path": "microsoft/layoutlmv3-base",
44
+ "only_label_first_subword": true,
45
+ "pad_token": {
46
+ "__type": "AddedToken",
47
+ "content": "<pad>",
48
+ "lstrip": false,
49
+ "normalized": true,
50
+ "rstrip": false,
51
+ "single_word": false
52
+ },
53
+ "pad_token_box": [
54
+ 0,
55
+ 0,
56
+ 0,
57
+ 0
58
+ ],
59
+ "pad_token_label": -100,
60
+ "sep_token": {
61
+ "__type": "AddedToken",
62
+ "content": "</s>",
63
+ "lstrip": false,
64
+ "normalized": true,
65
+ "rstrip": false,
66
+ "single_word": false
67
+ },
68
+ "sep_token_box": [
69
+ 0,
70
+ 0,
71
+ 0,
72
+ 0
73
+ ],
74
  "special_tokens_map_file": null,
75
+ "tokenizer_class": "LayoutLMv3Tokenizer",
76
+ "trim_offsets": true,
77
+ "unk_token": {
78
+ "__type": "AddedToken",
79
+ "content": "<unk>",
80
+ "lstrip": false,
81
+ "normalized": true,
82
+ "rstrip": false,
83
+ "single_word": false
84
+ }
85
  }