sreejith8100 commited on
Commit
d598341
1 Parent(s): 090d970

End of training

Browse files
README.md CHANGED
@@ -1,6 +1,4 @@
1
  ---
2
- license: mit
3
- base_model: naver-clova-ix/donut-base
4
  tags:
5
  - generated_from_trainer
6
  datasets:
@@ -15,9 +13,9 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  # donut-base-sroie
17
 
18
- This model is a fine-tuned version of [naver-clova-ix/donut-base](https://huggingface.co/naver-clova-ix/donut-base) on the imagefolder dataset.
19
  It achieves the following results on the evaluation set:
20
- - Loss: 1.5870
21
 
22
  ## Model description
23
 
@@ -42,18 +40,21 @@ The following hyperparameters were used during training:
42
  - seed: 42
43
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
44
  - lr_scheduler_type: linear
45
- - num_epochs: 6
46
 
47
  ### Training results
48
 
49
  | Training Loss | Epoch | Step | Validation Loss |
50
  |:-------------:|:-----:|:----:|:---------------:|
51
- | 5.0795 | 1.0 | 69 | 4.7780 |
52
- | 3.0477 | 2.0 | 138 | 2.4758 |
53
- | 1.3046 | 3.0 | 207 | 1.9672 |
54
- | 1.1231 | 4.0 | 276 | 1.6499 |
55
- | 1.8067 | 5.0 | 345 | 1.5982 |
56
- | 2.1638 | 6.0 | 414 | 1.5870 |
 
 
 
57
 
58
 
59
  ### Framework versions
 
1
  ---
 
 
2
  tags:
3
  - generated_from_trainer
4
  datasets:
 
13
 
14
  # donut-base-sroie
15
 
16
+ This model was trained from scratch on the imagefolder dataset.
17
  It achieves the following results on the evaluation set:
18
+ - Loss: 1.5314
19
 
20
  ## Model description
21
 
 
40
  - seed: 42
41
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
42
  - lr_scheduler_type: linear
43
+ - num_epochs: 10
44
 
45
  ### Training results
46
 
47
  | Training Loss | Epoch | Step | Validation Loss |
48
  |:-------------:|:-----:|:----:|:---------------:|
49
+ | 0.6691 | 1.0 | 69 | 1.4307 |
50
+ | 1.2955 | 2.0 | 138 | 1.4857 |
51
+ | 0.3205 | 3.0 | 207 | 1.4528 |
52
+ | 0.4622 | 4.0 | 276 | 1.3993 |
53
+ | 0.8009 | 5.0 | 345 | 1.4648 |
54
+ | 1.1553 | 6.0 | 414 | 1.4508 |
55
+ | 0.6056 | 7.0 | 483 | 1.4568 |
56
+ | 0.1881 | 8.0 | 552 | 1.4941 |
57
+ | 0.7731 | 9.0 | 621 | 1.5314 |
58
 
59
 
60
  ### Framework versions
generation_config.json CHANGED
@@ -1,8 +1,10 @@
1
  {
2
  "_from_model_config": true,
3
  "bos_token_id": 0,
 
4
  "eos_token_id": 2,
5
  "forced_eos_token_id": 2,
 
6
  "pad_token_id": 1,
7
  "transformers_version": "4.34.1"
8
  }
 
1
  {
2
  "_from_model_config": true,
3
  "bos_token_id": 0,
4
+ "decoder_start_token_id": 0,
5
  "eos_token_id": 2,
6
  "forced_eos_token_id": 2,
7
+ "max_length": 512,
8
  "pad_token_id": 1,
9
  "transformers_version": "4.34.1"
10
  }
preprocessor_config.json CHANGED
@@ -19,8 +19,8 @@
19
  "processor_class": "DonutProcessor",
20
  "resample": 2,
21
  "rescale_factor": 0.00392156862745098,
22
- "size": [
23
- 800,
24
- 1200
25
- ]
26
  }
 
19
  "processor_class": "DonutProcessor",
20
  "resample": 2,
21
  "rescale_factor": 0.00392156862745098,
22
+ "size": {
23
+ "height": 1200,
24
+ "width": 800
25
+ }
26
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cd201a11060f25f66ebd5f90aad2bf8226d3847ceae6a76ae2a76ff7fb22a44f
3
  size 809400534
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:578106901a1a1bf79a2410e0af0dcb7a1bc745f0c1bcb1497c913f7b536c8b72
3
  size 809400534
special_tokens_map.json CHANGED
@@ -1,401 +1,83 @@
1
  {
2
  "additional_special_tokens": [
3
- {
4
- "content": "<s_province>",
5
- "lstrip": false,
6
- "normalized": false,
7
- "rstrip": false,
8
- "single_word": false
9
- },
10
- {
11
- "content": "<s_mother_last_name>",
12
- "lstrip": false,
13
- "normalized": false,
14
- "rstrip": false,
15
- "single_word": false
16
- },
17
- {
18
- "content": "<s_father_last_name>",
19
- "lstrip": false,
20
- "normalized": false,
21
- "rstrip": false,
22
- "single_word": false
23
- },
24
- {
25
- "content": "</s_father_religion>",
26
- "lstrip": false,
27
- "normalized": false,
28
- "rstrip": false,
29
- "single_word": false
30
- },
31
- {
32
- "content": "<s_mother_nationality>",
33
- "lstrip": false,
34
- "normalized": false,
35
- "rstrip": false,
36
- "single_word": false
37
- },
38
- {
39
- "content": "</s_father_first_name>",
40
- "lstrip": false,
41
- "normalized": false,
42
- "rstrip": false,
43
- "single_word": false
44
- },
45
- {
46
- "content": "</s_father_occupation>",
47
- "lstrip": false,
48
- "normalized": false,
49
- "rstrip": false,
50
- "single_word": false
51
- },
52
- {
53
- "content": "</s_child_first_name>",
54
- "lstrip": false,
55
- "normalized": false,
56
- "rstrip": false,
57
- "single_word": false
58
- },
59
- {
60
- "content": "<s_mother_race>",
61
- "lstrip": false,
62
- "normalized": false,
63
- "rstrip": false,
64
- "single_word": false
65
- },
66
- {
67
- "content": "</s_mother_race>",
68
- "lstrip": false,
69
- "normalized": false,
70
- "rstrip": false,
71
- "single_word": false
72
- },
73
- {
74
- "content": "</s_city>",
75
- "lstrip": false,
76
- "normalized": false,
77
- "rstrip": false,
78
- "single_word": false
79
- },
80
- {
81
- "content": "<s_mother_religion>",
82
- "lstrip": false,
83
- "normalized": false,
84
- "rstrip": false,
85
- "single_word": false
86
- },
87
- {
88
- "content": "</s_mother_nationality>",
89
- "lstrip": false,
90
- "normalized": false,
91
- "rstrip": false,
92
- "single_word": false
93
- },
94
- {
95
- "content": "<s_father_first_name>",
96
- "lstrip": false,
97
- "normalized": false,
98
- "rstrip": false,
99
- "single_word": false
100
- },
101
- {
102
- "content": "<s_father_nationality>",
103
- "lstrip": false,
104
- "normalized": false,
105
- "rstrip": false,
106
- "single_word": false
107
- },
108
- {
109
- "content": "<s_mother-nationality>",
110
- "lstrip": false,
111
- "normalized": false,
112
- "rstrip": false,
113
- "single_word": false
114
- },
115
- {
116
- "content": "<s_father_occupaation>",
117
- "lstrip": false,
118
- "normalized": false,
119
- "rstrip": false,
120
- "single_word": false
121
- },
122
- {
123
- "content": "<s_mother_middle_name>",
124
- "lstrip": false,
125
- "normalized": false,
126
- "rstrip": false,
127
- "single_word": false
128
- },
129
- {
130
- "content": "</s_father_age>",
131
- "lstrip": false,
132
- "normalized": false,
133
- "rstrip": false,
134
- "single_word": false
135
- },
136
- {
137
- "content": "<s_child_last_name>",
138
- "lstrip": false,
139
- "normalized": false,
140
- "rstrip": false,
141
- "single_word": false
142
- },
143
- {
144
- "content": "<s_child_middle_name>",
145
- "lstrip": false,
146
- "normalized": false,
147
- "rstrip": false,
148
- "single_word": false
149
- },
150
- {
151
- "content": "<s_DOB>",
152
- "lstrip": false,
153
- "normalized": false,
154
- "rstrip": false,
155
- "single_word": false
156
- },
157
- {
158
- "content": "<s_mother_birthplace>",
159
- "lstrip": false,
160
- "normalized": false,
161
- "rstrip": false,
162
- "single_word": false
163
- },
164
- {
165
- "content": "</s_mother_last_name>",
166
- "lstrip": false,
167
- "normalized": false,
168
- "rstrip": false,
169
- "single_word": false
170
- },
171
- {
172
- "content": "<s_father_race>",
173
- "lstrip": false,
174
- "normalized": false,
175
- "rstrip": false,
176
- "single_word": false
177
- },
178
- {
179
- "content": "<s_city>",
180
- "lstrip": false,
181
- "normalized": false,
182
- "rstrip": false,
183
- "single_word": false
184
- },
185
- {
186
- "content": "</s_mother_first_name>",
187
- "lstrip": false,
188
- "normalized": false,
189
- "rstrip": false,
190
- "single_word": false
191
- },
192
- {
193
- "content": "<s>",
194
- "lstrip": false,
195
- "normalized": false,
196
- "rstrip": false,
197
- "single_word": false
198
- },
199
- {
200
- "content": "</s_mother_birthplace>",
201
- "lstrip": false,
202
- "normalized": false,
203
- "rstrip": false,
204
- "single_word": false
205
- },
206
- {
207
- "content": "</s_father_nationality>",
208
- "lstrip": false,
209
- "normalized": false,
210
- "rstrip": false,
211
- "single_word": false
212
- },
213
- {
214
- "content": "</s_mother_age>",
215
- "lstrip": false,
216
- "normalized": false,
217
- "rstrip": false,
218
- "single_word": false
219
- },
220
- {
221
- "content": "</s_father_nationalty>",
222
- "lstrip": false,
223
- "normalized": false,
224
- "rstrip": false,
225
- "single_word": false
226
- },
227
- {
228
- "content": "</s_mother-nationality>",
229
- "lstrip": false,
230
- "normalized": false,
231
- "rstrip": false,
232
- "single_word": false
233
- },
234
- {
235
- "content": "<s_child_first_name>",
236
- "lstrip": false,
237
- "normalized": false,
238
- "rstrip": false,
239
- "single_word": false
240
- },
241
- {
242
- "content": "</s_mother_middle_name>",
243
- "lstrip": false,
244
- "normalized": false,
245
- "rstrip": false,
246
- "single_word": false
247
- },
248
- {
249
- "content": "<s_father_middle_name>",
250
- "lstrip": false,
251
- "normalized": false,
252
- "rstrip": false,
253
- "single_word": false
254
- },
255
- {
256
- "content": "</s_father_race>",
257
- "lstrip": false,
258
- "normalized": false,
259
- "rstrip": false,
260
- "single_word": false
261
- },
262
- {
263
- "content": "<s_father_occupation>",
264
- "lstrip": false,
265
- "normalized": false,
266
- "rstrip": false,
267
- "single_word": false
268
- },
269
- {
270
- "content": "</s_father_last_name>",
271
- "lstrip": false,
272
- "normalized": false,
273
- "rstrip": false,
274
- "single_word": false
275
- },
276
- {
277
- "content": "</s_father_birthplace>",
278
- "lstrip": false,
279
- "normalized": false,
280
- "rstrip": false,
281
- "single_word": false
282
- },
283
- {
284
- "content": "<s_father_religion>",
285
- "lstrip": false,
286
- "normalized": false,
287
- "rstrip": false,
288
- "single_word": false
289
- },
290
- {
291
- "content": "</s_child_middle_name>",
292
- "lstrip": false,
293
- "normalized": false,
294
- "rstrip": false,
295
- "single_word": false
296
- },
297
- {
298
- "content": "</s_mother_nationalty>",
299
- "lstrip": false,
300
- "normalized": false,
301
- "rstrip": false,
302
- "single_word": false
303
- },
304
- {
305
- "content": "<s_mother_age>",
306
- "lstrip": false,
307
- "normalized": false,
308
- "rstrip": false,
309
- "single_word": false
310
- },
311
- {
312
- "content": "</s>",
313
- "lstrip": false,
314
- "normalized": false,
315
- "rstrip": false,
316
- "single_word": false
317
- },
318
- {
319
- "content": "</s_father_occupaation>",
320
- "lstrip": false,
321
- "normalized": false,
322
- "rstrip": false,
323
- "single_word": false
324
- },
325
- {
326
- "content": "<s_mother_nationalty>",
327
- "lstrip": false,
328
- "normalized": false,
329
- "rstrip": false,
330
- "single_word": false
331
- },
332
- {
333
- "content": "</s_DOB>",
334
- "lstrip": false,
335
- "normalized": false,
336
- "rstrip": false,
337
- "single_word": false
338
- },
339
- {
340
- "content": "<s_father_nationalty>",
341
- "lstrip": false,
342
- "normalized": false,
343
- "rstrip": false,
344
- "single_word": false
345
- },
346
- {
347
- "content": "<s_mother_first_name>",
348
- "lstrip": false,
349
- "normalized": false,
350
- "rstrip": false,
351
- "single_word": false
352
- },
353
- {
354
- "content": "<s_father_birthplace>",
355
- "lstrip": false,
356
- "normalized": false,
357
- "rstrip": false,
358
- "single_word": false
359
- },
360
- {
361
- "content": "</s_child_last_name>",
362
- "lstrip": false,
363
- "normalized": false,
364
- "rstrip": false,
365
- "single_word": false
366
- },
367
- {
368
- "content": "</s_mother_religion>",
369
- "lstrip": false,
370
- "normalized": false,
371
- "rstrip": false,
372
- "single_word": false
373
- },
374
- {
375
- "content": "</s_province>",
376
- "lstrip": false,
377
- "normalized": false,
378
- "rstrip": false,
379
- "single_word": false
380
- },
381
- {
382
- "content": "<s_father_age>",
383
- "lstrip": false,
384
- "normalized": false,
385
- "rstrip": false,
386
- "single_word": false
387
- },
388
- {
389
- "content": "</s_father_middle_name>",
390
- "lstrip": false,
391
- "normalized": false,
392
- "rstrip": false,
393
- "single_word": false
394
- }
395
  ],
396
- "bos_token": "<s>",
397
- "cls_token": "<s>",
398
- "eos_token": "</s>",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
399
  "mask_token": {
400
  "content": "<mask>",
401
  "lstrip": true,
@@ -403,7 +85,25 @@
403
  "rstrip": false,
404
  "single_word": false
405
  },
406
- "pad_token": "<pad>",
407
- "sep_token": "</s>",
408
- "unk_token": "<unk>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
409
  }
 
1
  {
2
  "additional_special_tokens": [
3
+ "<s_province>",
4
+ "<s_mother_last_name>",
5
+ "<s_father_last_name>",
6
+ "</s_father_religion>",
7
+ "<s_mother_nationality>",
8
+ "</s_father_first_name>",
9
+ "</s_father_occupation>",
10
+ "</s_child_first_name>",
11
+ "<s_mother_race>",
12
+ "</s_mother_race>",
13
+ "</s_city>",
14
+ "<s_mother_religion>",
15
+ "</s_mother_nationality>",
16
+ "<s_father_first_name>",
17
+ "<s_father_nationality>",
18
+ "<s_mother-nationality>",
19
+ "<s_father_occupaation>",
20
+ "<s_mother_middle_name>",
21
+ "</s_father_age>",
22
+ "<s_child_last_name>",
23
+ "<s_child_middle_name>",
24
+ "<s_DOB>",
25
+ "<s_mother_birthplace>",
26
+ "</s_mother_last_name>",
27
+ "<s_father_race>",
28
+ "<s_city>",
29
+ "</s_mother_first_name>",
30
+ "<s>",
31
+ "</s_mother_birthplace>",
32
+ "</s_father_nationality>",
33
+ "</s_mother_age>",
34
+ "</s_father_nationalty>",
35
+ "</s_mother-nationality>",
36
+ "<s_child_first_name>",
37
+ "</s_mother_middle_name>",
38
+ "<s_father_middle_name>",
39
+ "</s_father_race>",
40
+ "<s_father_occupation>",
41
+ "</s_father_last_name>",
42
+ "</s_father_birthplace>",
43
+ "<s_father_religion>",
44
+ "</s_child_middle_name>",
45
+ "</s_mother_nationalty>",
46
+ "<s_mother_age>",
47
+ "</s>",
48
+ "</s_father_occupaation>",
49
+ "<s_mother_nationalty>",
50
+ "</s_DOB>",
51
+ "<s_father_nationalty>",
52
+ "<s_mother_first_name>",
53
+ "<s_father_birthplace>",
54
+ "</s_child_last_name>",
55
+ "</s_mother_religion>",
56
+ "</s_province>",
57
+ "<s_father_age>",
58
+ "</s_father_middle_name>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
  ],
60
+ "bos_token": {
61
+ "content": "<s>",
62
+ "lstrip": false,
63
+ "normalized": false,
64
+ "rstrip": false,
65
+ "single_word": false
66
+ },
67
+ "cls_token": {
68
+ "content": "<s>",
69
+ "lstrip": false,
70
+ "normalized": false,
71
+ "rstrip": false,
72
+ "single_word": false
73
+ },
74
+ "eos_token": {
75
+ "content": "</s>",
76
+ "lstrip": false,
77
+ "normalized": false,
78
+ "rstrip": false,
79
+ "single_word": false
80
+ },
81
  "mask_token": {
82
  "content": "<mask>",
83
  "lstrip": true,
 
85
  "rstrip": false,
86
  "single_word": false
87
  },
88
+ "pad_token": {
89
+ "content": "<pad>",
90
+ "lstrip": false,
91
+ "normalized": false,
92
+ "rstrip": false,
93
+ "single_word": false
94
+ },
95
+ "sep_token": {
96
+ "content": "</s>",
97
+ "lstrip": false,
98
+ "normalized": false,
99
+ "rstrip": false,
100
+ "single_word": false
101
+ },
102
+ "unk_token": {
103
+ "content": "<unk>",
104
+ "lstrip": false,
105
+ "normalized": false,
106
+ "rstrip": false,
107
+ "single_word": false
108
+ }
109
  }
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
@@ -560,11 +560,18 @@
560
  "cls_token": "<s>",
561
  "eos_token": "</s>",
562
  "mask_token": "<mask>",
 
563
  "model_max_length": 1000000000000000019884624838656,
 
564
  "pad_token": "<pad>",
 
 
565
  "processor_class": "DonutProcessor",
566
  "sep_token": "</s>",
567
  "sp_model_kwargs": {},
 
568
  "tokenizer_class": "XLMRobertaTokenizer",
 
 
569
  "unk_token": "<unk>"
570
  }
 
560
  "cls_token": "<s>",
561
  "eos_token": "</s>",
562
  "mask_token": "<mask>",
563
+ "max_length": 512,
564
  "model_max_length": 1000000000000000019884624838656,
565
+ "pad_to_multiple_of": null,
566
  "pad_token": "<pad>",
567
+ "pad_token_type_id": 0,
568
+ "padding_side": "right",
569
  "processor_class": "DonutProcessor",
570
  "sep_token": "</s>",
571
  "sp_model_kwargs": {},
572
+ "stride": 0,
573
  "tokenizer_class": "XLMRobertaTokenizer",
574
+ "truncation_side": "right",
575
+ "truncation_strategy": "longest_first",
576
  "unk_token": "<unk>"
577
  }