nielsr HF staff commited on
Commit
f933c7f
1 Parent(s): d25460e
added_tokens.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "</s_class>": 57523,
3
+ "<advertisement/>": 57524,
4
+ "<budget/>": 57525,
5
+ "<email/>": 57526,
6
+ "<file_folder/>": 57527,
7
+ "<form/>": 57528,
8
+ "<handwritten/>": 57529,
9
+ "<invoice/>": 57530,
10
+ "<letter/>": 57531,
11
+ "<memo/>": 57532,
12
+ "<news_article/>": 57533,
13
+ "<presentation/>": 57534,
14
+ "<questionnaire/>": 57535,
15
+ "<resume/>": 57536,
16
+ "<s_class>": 57537,
17
+ "<s_iitcdip>": 57538,
18
+ "<s_rvlcdip>": 57539,
19
+ "<s_synthdog>": 57540,
20
+ "<scientific_publication/>": 57541,
21
+ "<scientific_report/>": 57542,
22
+ "<sep/>": 57522,
23
+ "<specification/>": 57543
24
+ }
preprocessor_config.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "do_pad": true,
4
+ "do_resize_and_thumbnail": true,
5
+ "feature_extractor_type": "DonutFeatureExtractor",
6
+ "image_mean": [
7
+ 0.5,
8
+ 0.5,
9
+ 0.5
10
+ ],
11
+ "image_std": [
12
+ 0.5,
13
+ 0.5,
14
+ 0.5
15
+ ],
16
+ "processor_class": "DonutProcessor",
17
+ "resample": 2,
18
+ "size": [
19
+ 1920,
20
+ 2560
21
+ ]
22
+ }
sentencepiece.bpe.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb9e3dce4c326195d08fc3dd0f7e2eee1da8595c847bf4c1a9c78b7a82d47e2d
3
+ size 1296245
special_tokens_map.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "</s_class>",
4
+ "<advertisement/>",
5
+ "<budget/>",
6
+ "<email/>",
7
+ "<file_folder/>",
8
+ "<form/>",
9
+ "<handwritten/>",
10
+ "<invoice/>",
11
+ "<letter/>",
12
+ "<memo/>",
13
+ "<news_article/>",
14
+ "<presentation/>",
15
+ "<questionnaire/>",
16
+ "<resume/>",
17
+ "<s_class>",
18
+ "<s_iitcdip>",
19
+ "<s_rvlcdip>",
20
+ "<s_synthdog>",
21
+ "<scientific_publication/>",
22
+ "<scientific_report/>",
23
+ "<specification/>"
24
+ ],
25
+ "bos_token": "<s>",
26
+ "cls_token": "<s>",
27
+ "eos_token": "</s>",
28
+ "mask_token": {
29
+ "content": "<mask>",
30
+ "lstrip": true,
31
+ "normalized": true,
32
+ "rstrip": false,
33
+ "single_word": false
34
+ },
35
+ "pad_token": "<pad>",
36
+ "sep_token": "</s>",
37
+ "unk_token": "<unk>"
38
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "cls_token": "<s>",
4
+ "eos_token": "</s>",
5
+ "from_slow": true,
6
+ "mask_token": {
7
+ "__type": "AddedToken",
8
+ "content": "<mask>",
9
+ "lstrip": true,
10
+ "normalized": true,
11
+ "rstrip": false,
12
+ "single_word": false
13
+ },
14
+ "name_or_path": "naver-clova-ix/donut-base-finetuned-rvlcdip",
15
+ "pad_token": "<pad>",
16
+ "processor_class": "DonutProcessor",
17
+ "sep_token": "</s>",
18
+ "sp_model_kwargs": {},
19
+ "special_tokens_map_file": null,
20
+ "tokenizer_class": "XLMRobertaTokenizer",
21
+ "unk_token": "<unk>"
22
+ }