Guigadal commited on
Commit
c22ad84
1 Parent(s): 350179f

Upload processor

Browse files
.gitattributes CHANGED
@@ -32,3 +32,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
35
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
preprocessor_config.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "apply_ocr": true,
3
+ "do_resize": true,
4
+ "image_processor_type": "LayoutLMv2FeatureExtractor",
5
+ "ocr_lang": null,
6
+ "processor_class": "LayoutXLMProcessor",
7
+ "resample": 2,
8
+ "size": {
9
+ "height": 224,
10
+ "width": 224
11
+ },
12
+ "tesseract_config": ""
13
+ }
special_tokens_map.json CHANGED
@@ -5,7 +5,7 @@
5
  "mask_token": {
6
  "content": "<mask>",
7
  "lstrip": true,
8
- "normalized": true,
9
  "rstrip": false,
10
  "single_word": false
11
  },
 
5
  "mask_token": {
6
  "content": "<mask>",
7
  "lstrip": true,
8
+ "normalized": false,
9
  "rstrip": false,
10
  "single_word": false
11
  },
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:affcfb1f45c4b14a70a6589c3d153b430ed4309e5a6613a88dab64d5a923a5d6
3
+ size 17082925
tokenizer_config.json CHANGED
@@ -27,6 +27,7 @@
27
  0
28
  ],
29
  "pad_token_label": -100,
 
30
  "sep_token": "</s>",
31
  "sep_token_box": [
32
  1000,
@@ -34,7 +35,6 @@
34
  1000,
35
  1000
36
  ],
37
- "sp_model_kwargs": {},
38
  "special_tokens_map_file": null,
39
  "tokenizer_class": "LayoutXLMTokenizer",
40
  "unk_token": "<unk>"
 
27
  0
28
  ],
29
  "pad_token_label": -100,
30
+ "processor_class": "LayoutXLMProcessor",
31
  "sep_token": "</s>",
32
  "sep_token_box": [
33
  1000,
 
35
  1000,
36
  1000
37
  ],
 
38
  "special_tokens_map_file": null,
39
  "tokenizer_class": "LayoutXLMTokenizer",
40
  "unk_token": "<unk>"