Flova commited on
Commit
70c1a1f
1 Parent(s): c771bea

Upload processor

Browse files
added_tokens.json ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "''": 57526,
3
+ "1": 57525,
4
+ "<s_iitcdip>": 57523,
5
+ "<s_synthdog>": 57524,
6
+ "<sep/>": 57522,
7
+ "a''1 ": 57582,
8
+ "a''16 ": 57586,
9
+ "a''2 ": 57583,
10
+ "a''4 ": 57584,
11
+ "a''8 ": 57585,
12
+ "a'1 ": 57577,
13
+ "a'16 ": 57581,
14
+ "a'2 ": 57578,
15
+ "a'4 ": 57579,
16
+ "a'8 ": 57580,
17
+ "b''1 ": 57592,
18
+ "b''16 ": 57596,
19
+ "b''2 ": 57593,
20
+ "b''4 ": 57594,
21
+ "b''8 ": 57595,
22
+ "b'1 ": 57587,
23
+ "b'16 ": 57591,
24
+ "b'2 ": 57588,
25
+ "b'4 ": 57589,
26
+ "b'8 ": 57590,
27
+ "c''1 ": 57532,
28
+ "c''16 ": 57536,
29
+ "c''2 ": 57533,
30
+ "c''4 ": 57534,
31
+ "c''8 ": 57535,
32
+ "c'1 ": 57527,
33
+ "c'16 ": 57531,
34
+ "c'2 ": 57528,
35
+ "c'4 ": 57529,
36
+ "c'8 ": 57530,
37
+ "d''1 ": 57542,
38
+ "d''16 ": 57546,
39
+ "d''2 ": 57543,
40
+ "d''4 ": 57544,
41
+ "d''8 ": 57545,
42
+ "d'1 ": 57537,
43
+ "d'16 ": 57541,
44
+ "d'2 ": 57538,
45
+ "d'4 ": 57539,
46
+ "d'8 ": 57540,
47
+ "e''1 ": 57552,
48
+ "e''16 ": 57556,
49
+ "e''2 ": 57553,
50
+ "e''4 ": 57554,
51
+ "e''8 ": 57555,
52
+ "e'1 ": 57547,
53
+ "e'16 ": 57551,
54
+ "e'2 ": 57548,
55
+ "e'4 ": 57549,
56
+ "e'8 ": 57550,
57
+ "f''1 ": 57562,
58
+ "f''16 ": 57566,
59
+ "f''2 ": 57563,
60
+ "f''4 ": 57564,
61
+ "f''8 ": 57565,
62
+ "f'1 ": 57557,
63
+ "f'16 ": 57561,
64
+ "f'2 ": 57558,
65
+ "f'4 ": 57559,
66
+ "f'8 ": 57560,
67
+ "g''1 ": 57572,
68
+ "g''16 ": 57576,
69
+ "g''2 ": 57573,
70
+ "g''4 ": 57574,
71
+ "g''8 ": 57575,
72
+ "g'1 ": 57567,
73
+ "g'16 ": 57571,
74
+ "g'2 ": 57568,
75
+ "g'4 ": 57569,
76
+ "g'8 ": 57570,
77
+ "r1 ": 57597,
78
+ "r16 ": 57601,
79
+ "r2 ": 57598,
80
+ "r4 ": 57599,
81
+ "r8 ": 57600
82
+ }
preprocessor_config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_align_long_axis": false,
3
+ "do_normalize": true,
4
+ "do_pad": true,
5
+ "do_rescale": true,
6
+ "do_resize": true,
7
+ "do_thumbnail": true,
8
+ "image_mean": [
9
+ 0.5,
10
+ 0.5,
11
+ 0.5
12
+ ],
13
+ "image_processor_type": "DonutImageProcessor",
14
+ "image_std": [
15
+ 0.5,
16
+ 0.5,
17
+ 0.5
18
+ ],
19
+ "processor_class": "DonutProcessor",
20
+ "resample": 2,
21
+ "rescale_factor": 0.00392156862745098,
22
+ "size": {
23
+ "height": 583,
24
+ "width": 409
25
+ }
26
+ }
sentencepiece.bpe.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb9e3dce4c326195d08fc3dd0f7e2eee1da8595c847bf4c1a9c78b7a82d47e2d
3
+ size 1296245
special_tokens_map.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<s_iitcdip>",
4
+ "<s_synthdog>"
5
+ ],
6
+ "bos_token": "<s>",
7
+ "cls_token": "<s>",
8
+ "eos_token": "</s>",
9
+ "mask_token": {
10
+ "content": "<mask>",
11
+ "lstrip": true,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "<pad>",
17
+ "sep_token": "</s>",
18
+ "unk_token": "<unk>"
19
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "cls_token": "<s>",
4
+ "eos_token": "</s>",
5
+ "mask_token": {
6
+ "__type": "AddedToken",
7
+ "content": "<mask>",
8
+ "lstrip": true,
9
+ "normalized": true,
10
+ "rstrip": false,
11
+ "single_word": false
12
+ },
13
+ "model_max_length": 1000000000000000019884624838656,
14
+ "name_or_path": "./model_5",
15
+ "pad_token": "<pad>",
16
+ "processor_class": "DonutProcessor",
17
+ "sep_token": "</s>",
18
+ "sp_model_kwargs": {},
19
+ "special_tokens_map_file": null,
20
+ "tokenizer_class": "XLMRobertaTokenizer",
21
+ "unk_token": "<unk>"
22
+ }