subhavarshith
commited on
Commit
•
53e4a1e
1
Parent(s):
a36dea7
Training done
Browse files- preprocessor_config.json +4 -4
- special_tokens_map.json +2 -2
- tokenizer.json +0 -0
- tokenizer_config.json +8 -1
preprocessor_config.json
CHANGED
@@ -37,8 +37,8 @@
|
|
37 |
"processor_class": "DonutProcessor",
|
38 |
"resample": 2,
|
39 |
"rescale_factor": 0.00392156862745098,
|
40 |
-
"size":
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
}
|
|
|
37 |
"processor_class": "DonutProcessor",
|
38 |
"resample": 2,
|
39 |
"rescale_factor": 0.00392156862745098,
|
40 |
+
"size": {
|
41 |
+
"height": 1280,
|
42 |
+
"width": 960
|
43 |
+
}
|
44 |
}
|
special_tokens_map.json
CHANGED
@@ -6,14 +6,14 @@
|
|
6 |
"bos_token": {
|
7 |
"content": "<s>",
|
8 |
"lstrip": false,
|
9 |
-
"normalized":
|
10 |
"rstrip": false,
|
11 |
"single_word": false
|
12 |
},
|
13 |
"cls_token": {
|
14 |
"content": "<s>",
|
15 |
"lstrip": false,
|
16 |
-
"normalized":
|
17 |
"rstrip": false,
|
18 |
"single_word": false
|
19 |
},
|
|
|
6 |
"bos_token": {
|
7 |
"content": "<s>",
|
8 |
"lstrip": false,
|
9 |
+
"normalized": true,
|
10 |
"rstrip": false,
|
11 |
"single_word": false
|
12 |
},
|
13 |
"cls_token": {
|
14 |
"content": "<s>",
|
15 |
"lstrip": false,
|
16 |
+
"normalized": true,
|
17 |
"rstrip": false,
|
18 |
"single_word": false
|
19 |
},
|
tokenizer.json
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
CHANGED
@@ -6,7 +6,7 @@
|
|
6 |
"normalized": true,
|
7 |
"rstrip": false,
|
8 |
"single_word": false,
|
9 |
-
"special":
|
10 |
},
|
11 |
"1": {
|
12 |
"content": "<pad>",
|
@@ -346,11 +346,18 @@
|
|
346 |
"cls_token": "<s>",
|
347 |
"eos_token": "</s>",
|
348 |
"mask_token": "<mask>",
|
|
|
349 |
"model_max_length": 1000000000000000019884624838656,
|
|
|
350 |
"pad_token": "<pad>",
|
|
|
|
|
351 |
"processor_class": "DonutProcessor",
|
352 |
"sep_token": "</s>",
|
353 |
"sp_model_kwargs": {},
|
|
|
354 |
"tokenizer_class": "XLMRobertaTokenizer",
|
|
|
|
|
355 |
"unk_token": "<unk>"
|
356 |
}
|
|
|
6 |
"normalized": true,
|
7 |
"rstrip": false,
|
8 |
"single_word": false,
|
9 |
+
"special": true
|
10 |
},
|
11 |
"1": {
|
12 |
"content": "<pad>",
|
|
|
346 |
"cls_token": "<s>",
|
347 |
"eos_token": "</s>",
|
348 |
"mask_token": "<mask>",
|
349 |
+
"max_length": 768,
|
350 |
"model_max_length": 1000000000000000019884624838656,
|
351 |
+
"pad_to_multiple_of": null,
|
352 |
"pad_token": "<pad>",
|
353 |
+
"pad_token_type_id": 0,
|
354 |
+
"padding_side": "right",
|
355 |
"processor_class": "DonutProcessor",
|
356 |
"sep_token": "</s>",
|
357 |
"sp_model_kwargs": {},
|
358 |
+
"stride": 0,
|
359 |
"tokenizer_class": "XLMRobertaTokenizer",
|
360 |
+
"truncation_side": "right",
|
361 |
+
"truncation_strategy": "longest_first",
|
362 |
"unk_token": "<unk>"
|
363 |
}
|