sreejith8100 commited on
Commit
8986a0d
1 Parent(s): ab85f8f

End of training

Browse files
README.md CHANGED
@@ -1,4 +1,6 @@
1
  ---
 
 
2
  tags:
3
  - generated_from_trainer
4
  datasets:
@@ -13,9 +15,9 @@ should probably proofread and complete it, then remove this comment. -->
13
 
14
  # donut-base-sroie
15
 
16
- This model was trained from scratch on the imagefolder dataset.
17
  It achieves the following results on the evaluation set:
18
- - Loss: 1.5314
19
 
20
  ## Model description
21
 
@@ -40,21 +42,22 @@ The following hyperparameters were used during training:
40
  - seed: 42
41
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
42
  - lr_scheduler_type: linear
43
- - num_epochs: 10
44
 
45
  ### Training results
46
 
47
  | Training Loss | Epoch | Step | Validation Loss |
48
  |:-------------:|:-----:|:----:|:---------------:|
49
- | 0.6691 | 1.0 | 69 | 1.4307 |
50
- | 1.2955 | 2.0 | 138 | 1.4857 |
51
- | 0.3205 | 3.0 | 207 | 1.4528 |
52
- | 0.4622 | 4.0 | 276 | 1.3993 |
53
- | 0.8009 | 5.0 | 345 | 1.4648 |
54
- | 1.1553 | 6.0 | 414 | 1.4508 |
55
- | 0.6056 | 7.0 | 483 | 1.4568 |
56
- | 0.1881 | 8.0 | 552 | 1.4941 |
57
- | 0.7731 | 9.0 | 621 | 1.5314 |
 
58
 
59
 
60
  ### Framework versions
 
1
  ---
2
+ license: mit
3
+ base_model: naver-clova-ix/donut-base
4
  tags:
5
  - generated_from_trainer
6
  datasets:
 
15
 
16
  # donut-base-sroie
17
 
18
+ This model is a fine-tuned version of [naver-clova-ix/donut-base](https://huggingface.co/naver-clova-ix/donut-base) on the imagefolder dataset.
19
  It achieves the following results on the evaluation set:
20
+ - Loss: 1.2436
21
 
22
  ## Model description
23
 
 
42
  - seed: 42
43
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
44
  - lr_scheduler_type: linear
45
+ - num_epochs: 20
46
 
47
  ### Training results
48
 
49
  | Training Loss | Epoch | Step | Validation Loss |
50
  |:-------------:|:-----:|:----:|:---------------:|
51
+ | 4.3512 | 1.0 | 73 | 3.7961 |
52
+ | 2.0326 | 2.0 | 146 | 1.9182 |
53
+ | 1.2065 | 3.0 | 219 | 1.4062 |
54
+ | 1.5587 | 4.0 | 292 | 1.2381 |
55
+ | 0.7234 | 5.0 | 365 | 1.2256 |
56
+ | 0.9119 | 6.0 | 438 | 1.2304 |
57
+ | 0.3453 | 7.0 | 511 | 1.2262 |
58
+ | 0.7999 | 8.0 | 584 | 1.2491 |
59
+ | 0.216 | 9.0 | 657 | 1.2270 |
60
+ | 0.5934 | 10.0 | 730 | 1.2436 |
61
 
62
 
63
  ### Framework versions
generation_config.json CHANGED
@@ -1,10 +1,8 @@
1
  {
2
  "_from_model_config": true,
3
  "bos_token_id": 0,
4
- "decoder_start_token_id": 0,
5
  "eos_token_id": 2,
6
  "forced_eos_token_id": 2,
7
- "max_length": 512,
8
  "pad_token_id": 1,
9
  "transformers_version": "4.34.1"
10
  }
 
1
  {
2
  "_from_model_config": true,
3
  "bos_token_id": 0,
 
4
  "eos_token_id": 2,
5
  "forced_eos_token_id": 2,
 
6
  "pad_token_id": 1,
7
  "transformers_version": "4.34.1"
8
  }
preprocessor_config.json CHANGED
@@ -19,8 +19,8 @@
19
  "processor_class": "DonutProcessor",
20
  "resample": 2,
21
  "rescale_factor": 0.00392156862745098,
22
- "size": {
23
- "height": 1200,
24
- "width": 800
25
- }
26
  }
 
19
  "processor_class": "DonutProcessor",
20
  "resample": 2,
21
  "rescale_factor": 0.00392156862745098,
22
+ "size": [
23
+ 800,
24
+ 1200
25
+ ]
26
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:48513d40f4b138655145e43b687d8d014fc2cc3b5d24b5f911e46a500ec348ef
3
  size 809400534
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:940209738361fc6be26a8a76a96c335a92435a5e3af0fedf6b328694ed4cf24d
3
  size 809400534
special_tokens_map.json CHANGED
@@ -1,83 +1,401 @@
1
  {
2
  "additional_special_tokens": [
3
- "<s_province>",
4
- "<s_mother_last_name>",
5
- "<s_father_last_name>",
6
- "</s_father_religion>",
7
- "<s_mother_nationality>",
8
- "</s_father_first_name>",
9
- "</s_father_occupation>",
10
- "</s_child_first_name>",
11
- "<s_mother_race>",
12
- "</s_mother_race>",
13
- "</s_city>",
14
- "<s_mother_religion>",
15
- "</s_mother_nationality>",
16
- "<s_father_first_name>",
17
- "<s_father_nationality>",
18
- "<s_mother-nationality>",
19
- "<s_father_occupaation>",
20
- "<s_mother_middle_name>",
21
- "</s_father_age>",
22
- "<s_child_last_name>",
23
- "<s_child_middle_name>",
24
- "<s_DOB>",
25
- "<s_mother_birthplace>",
26
- "</s_mother_last_name>",
27
- "<s_father_race>",
28
- "<s_city>",
29
- "</s_mother_first_name>",
30
- "<s>",
31
- "</s_mother_birthplace>",
32
- "</s_father_nationality>",
33
- "</s_mother_age>",
34
- "</s_father_nationalty>",
35
- "</s_mother-nationality>",
36
- "<s_child_first_name>",
37
- "</s_mother_middle_name>",
38
- "<s_father_middle_name>",
39
- "</s_father_race>",
40
- "<s_father_occupation>",
41
- "</s_father_last_name>",
42
- "</s_father_birthplace>",
43
- "<s_father_religion>",
44
- "</s_child_middle_name>",
45
- "</s_mother_nationalty>",
46
- "<s_mother_age>",
47
- "</s>",
48
- "</s_father_occupaation>",
49
- "<s_mother_nationalty>",
50
- "</s_DOB>",
51
- "<s_father_nationalty>",
52
- "<s_mother_first_name>",
53
- "<s_father_birthplace>",
54
- "</s_child_last_name>",
55
- "</s_mother_religion>",
56
- "</s_province>",
57
- "<s_father_age>",
58
- "</s_father_middle_name>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
  ],
60
- "bos_token": {
61
- "content": "<s>",
62
- "lstrip": false,
63
- "normalized": false,
64
- "rstrip": false,
65
- "single_word": false
66
- },
67
- "cls_token": {
68
- "content": "<s>",
69
- "lstrip": false,
70
- "normalized": false,
71
- "rstrip": false,
72
- "single_word": false
73
- },
74
- "eos_token": {
75
- "content": "</s>",
76
- "lstrip": false,
77
- "normalized": false,
78
- "rstrip": false,
79
- "single_word": false
80
- },
81
  "mask_token": {
82
  "content": "<mask>",
83
  "lstrip": true,
@@ -85,25 +403,7 @@
85
  "rstrip": false,
86
  "single_word": false
87
  },
88
- "pad_token": {
89
- "content": "<pad>",
90
- "lstrip": false,
91
- "normalized": false,
92
- "rstrip": false,
93
- "single_word": false
94
- },
95
- "sep_token": {
96
- "content": "</s>",
97
- "lstrip": false,
98
- "normalized": false,
99
- "rstrip": false,
100
- "single_word": false
101
- },
102
- "unk_token": {
103
- "content": "<unk>",
104
- "lstrip": false,
105
- "normalized": false,
106
- "rstrip": false,
107
- "single_word": false
108
- }
109
  }
 
1
  {
2
  "additional_special_tokens": [
3
+ {
4
+ "content": "<s_province>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false
9
+ },
10
+ {
11
+ "content": "<s_mother_last_name>",
12
+ "lstrip": false,
13
+ "normalized": false,
14
+ "rstrip": false,
15
+ "single_word": false
16
+ },
17
+ {
18
+ "content": "<s_father_last_name>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ },
24
+ {
25
+ "content": "</s_father_religion>",
26
+ "lstrip": false,
27
+ "normalized": false,
28
+ "rstrip": false,
29
+ "single_word": false
30
+ },
31
+ {
32
+ "content": "<s_mother_nationality>",
33
+ "lstrip": false,
34
+ "normalized": false,
35
+ "rstrip": false,
36
+ "single_word": false
37
+ },
38
+ {
39
+ "content": "</s_father_first_name>",
40
+ "lstrip": false,
41
+ "normalized": false,
42
+ "rstrip": false,
43
+ "single_word": false
44
+ },
45
+ {
46
+ "content": "</s_father_occupation>",
47
+ "lstrip": false,
48
+ "normalized": false,
49
+ "rstrip": false,
50
+ "single_word": false
51
+ },
52
+ {
53
+ "content": "</s_child_first_name>",
54
+ "lstrip": false,
55
+ "normalized": false,
56
+ "rstrip": false,
57
+ "single_word": false
58
+ },
59
+ {
60
+ "content": "<s_mother_race>",
61
+ "lstrip": false,
62
+ "normalized": false,
63
+ "rstrip": false,
64
+ "single_word": false
65
+ },
66
+ {
67
+ "content": "</s_mother_race>",
68
+ "lstrip": false,
69
+ "normalized": false,
70
+ "rstrip": false,
71
+ "single_word": false
72
+ },
73
+ {
74
+ "content": "</s_city>",
75
+ "lstrip": false,
76
+ "normalized": false,
77
+ "rstrip": false,
78
+ "single_word": false
79
+ },
80
+ {
81
+ "content": "<s_mother_religion>",
82
+ "lstrip": false,
83
+ "normalized": false,
84
+ "rstrip": false,
85
+ "single_word": false
86
+ },
87
+ {
88
+ "content": "</s_mother_nationality>",
89
+ "lstrip": false,
90
+ "normalized": false,
91
+ "rstrip": false,
92
+ "single_word": false
93
+ },
94
+ {
95
+ "content": "<s_father_first_name>",
96
+ "lstrip": false,
97
+ "normalized": false,
98
+ "rstrip": false,
99
+ "single_word": false
100
+ },
101
+ {
102
+ "content": "<s_father_nationality>",
103
+ "lstrip": false,
104
+ "normalized": false,
105
+ "rstrip": false,
106
+ "single_word": false
107
+ },
108
+ {
109
+ "content": "<s_mother-nationality>",
110
+ "lstrip": false,
111
+ "normalized": false,
112
+ "rstrip": false,
113
+ "single_word": false
114
+ },
115
+ {
116
+ "content": "<s_father_occupaation>",
117
+ "lstrip": false,
118
+ "normalized": false,
119
+ "rstrip": false,
120
+ "single_word": false
121
+ },
122
+ {
123
+ "content": "<s_mother_middle_name>",
124
+ "lstrip": false,
125
+ "normalized": false,
126
+ "rstrip": false,
127
+ "single_word": false
128
+ },
129
+ {
130
+ "content": "</s_father_age>",
131
+ "lstrip": false,
132
+ "normalized": false,
133
+ "rstrip": false,
134
+ "single_word": false
135
+ },
136
+ {
137
+ "content": "<s_child_last_name>",
138
+ "lstrip": false,
139
+ "normalized": false,
140
+ "rstrip": false,
141
+ "single_word": false
142
+ },
143
+ {
144
+ "content": "<s_child_middle_name>",
145
+ "lstrip": false,
146
+ "normalized": false,
147
+ "rstrip": false,
148
+ "single_word": false
149
+ },
150
+ {
151
+ "content": "<s_DOB>",
152
+ "lstrip": false,
153
+ "normalized": false,
154
+ "rstrip": false,
155
+ "single_word": false
156
+ },
157
+ {
158
+ "content": "<s_mother_birthplace>",
159
+ "lstrip": false,
160
+ "normalized": false,
161
+ "rstrip": false,
162
+ "single_word": false
163
+ },
164
+ {
165
+ "content": "</s_mother_last_name>",
166
+ "lstrip": false,
167
+ "normalized": false,
168
+ "rstrip": false,
169
+ "single_word": false
170
+ },
171
+ {
172
+ "content": "<s_father_race>",
173
+ "lstrip": false,
174
+ "normalized": false,
175
+ "rstrip": false,
176
+ "single_word": false
177
+ },
178
+ {
179
+ "content": "<s_city>",
180
+ "lstrip": false,
181
+ "normalized": false,
182
+ "rstrip": false,
183
+ "single_word": false
184
+ },
185
+ {
186
+ "content": "</s_mother_first_name>",
187
+ "lstrip": false,
188
+ "normalized": false,
189
+ "rstrip": false,
190
+ "single_word": false
191
+ },
192
+ {
193
+ "content": "<s>",
194
+ "lstrip": false,
195
+ "normalized": false,
196
+ "rstrip": false,
197
+ "single_word": false
198
+ },
199
+ {
200
+ "content": "</s_mother_birthplace>",
201
+ "lstrip": false,
202
+ "normalized": false,
203
+ "rstrip": false,
204
+ "single_word": false
205
+ },
206
+ {
207
+ "content": "</s_father_nationality>",
208
+ "lstrip": false,
209
+ "normalized": false,
210
+ "rstrip": false,
211
+ "single_word": false
212
+ },
213
+ {
214
+ "content": "</s_mother_age>",
215
+ "lstrip": false,
216
+ "normalized": false,
217
+ "rstrip": false,
218
+ "single_word": false
219
+ },
220
+ {
221
+ "content": "</s_father_nationalty>",
222
+ "lstrip": false,
223
+ "normalized": false,
224
+ "rstrip": false,
225
+ "single_word": false
226
+ },
227
+ {
228
+ "content": "</s_mother-nationality>",
229
+ "lstrip": false,
230
+ "normalized": false,
231
+ "rstrip": false,
232
+ "single_word": false
233
+ },
234
+ {
235
+ "content": "<s_child_first_name>",
236
+ "lstrip": false,
237
+ "normalized": false,
238
+ "rstrip": false,
239
+ "single_word": false
240
+ },
241
+ {
242
+ "content": "</s_mother_middle_name>",
243
+ "lstrip": false,
244
+ "normalized": false,
245
+ "rstrip": false,
246
+ "single_word": false
247
+ },
248
+ {
249
+ "content": "<s_father_middle_name>",
250
+ "lstrip": false,
251
+ "normalized": false,
252
+ "rstrip": false,
253
+ "single_word": false
254
+ },
255
+ {
256
+ "content": "</s_father_race>",
257
+ "lstrip": false,
258
+ "normalized": false,
259
+ "rstrip": false,
260
+ "single_word": false
261
+ },
262
+ {
263
+ "content": "<s_father_occupation>",
264
+ "lstrip": false,
265
+ "normalized": false,
266
+ "rstrip": false,
267
+ "single_word": false
268
+ },
269
+ {
270
+ "content": "</s_father_last_name>",
271
+ "lstrip": false,
272
+ "normalized": false,
273
+ "rstrip": false,
274
+ "single_word": false
275
+ },
276
+ {
277
+ "content": "</s_father_birthplace>",
278
+ "lstrip": false,
279
+ "normalized": false,
280
+ "rstrip": false,
281
+ "single_word": false
282
+ },
283
+ {
284
+ "content": "<s_father_religion>",
285
+ "lstrip": false,
286
+ "normalized": false,
287
+ "rstrip": false,
288
+ "single_word": false
289
+ },
290
+ {
291
+ "content": "</s_child_middle_name>",
292
+ "lstrip": false,
293
+ "normalized": false,
294
+ "rstrip": false,
295
+ "single_word": false
296
+ },
297
+ {
298
+ "content": "</s_mother_nationalty>",
299
+ "lstrip": false,
300
+ "normalized": false,
301
+ "rstrip": false,
302
+ "single_word": false
303
+ },
304
+ {
305
+ "content": "<s_mother_age>",
306
+ "lstrip": false,
307
+ "normalized": false,
308
+ "rstrip": false,
309
+ "single_word": false
310
+ },
311
+ {
312
+ "content": "</s>",
313
+ "lstrip": false,
314
+ "normalized": false,
315
+ "rstrip": false,
316
+ "single_word": false
317
+ },
318
+ {
319
+ "content": "</s_father_occupaation>",
320
+ "lstrip": false,
321
+ "normalized": false,
322
+ "rstrip": false,
323
+ "single_word": false
324
+ },
325
+ {
326
+ "content": "<s_mother_nationalty>",
327
+ "lstrip": false,
328
+ "normalized": false,
329
+ "rstrip": false,
330
+ "single_word": false
331
+ },
332
+ {
333
+ "content": "</s_DOB>",
334
+ "lstrip": false,
335
+ "normalized": false,
336
+ "rstrip": false,
337
+ "single_word": false
338
+ },
339
+ {
340
+ "content": "<s_father_nationalty>",
341
+ "lstrip": false,
342
+ "normalized": false,
343
+ "rstrip": false,
344
+ "single_word": false
345
+ },
346
+ {
347
+ "content": "<s_mother_first_name>",
348
+ "lstrip": false,
349
+ "normalized": false,
350
+ "rstrip": false,
351
+ "single_word": false
352
+ },
353
+ {
354
+ "content": "<s_father_birthplace>",
355
+ "lstrip": false,
356
+ "normalized": false,
357
+ "rstrip": false,
358
+ "single_word": false
359
+ },
360
+ {
361
+ "content": "</s_child_last_name>",
362
+ "lstrip": false,
363
+ "normalized": false,
364
+ "rstrip": false,
365
+ "single_word": false
366
+ },
367
+ {
368
+ "content": "</s_mother_religion>",
369
+ "lstrip": false,
370
+ "normalized": false,
371
+ "rstrip": false,
372
+ "single_word": false
373
+ },
374
+ {
375
+ "content": "</s_province>",
376
+ "lstrip": false,
377
+ "normalized": false,
378
+ "rstrip": false,
379
+ "single_word": false
380
+ },
381
+ {
382
+ "content": "<s_father_age>",
383
+ "lstrip": false,
384
+ "normalized": false,
385
+ "rstrip": false,
386
+ "single_word": false
387
+ },
388
+ {
389
+ "content": "</s_father_middle_name>",
390
+ "lstrip": false,
391
+ "normalized": false,
392
+ "rstrip": false,
393
+ "single_word": false
394
+ }
395
  ],
396
+ "bos_token": "<s>",
397
+ "cls_token": "<s>",
398
+ "eos_token": "</s>",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
399
  "mask_token": {
400
  "content": "<mask>",
401
  "lstrip": true,
 
403
  "rstrip": false,
404
  "single_word": false
405
  },
406
+ "pad_token": "<pad>",
407
+ "sep_token": "</s>",
408
+ "unk_token": "<unk>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
409
  }
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
@@ -560,18 +560,11 @@
560
  "cls_token": "<s>",
561
  "eos_token": "</s>",
562
  "mask_token": "<mask>",
563
- "max_length": 512,
564
  "model_max_length": 1000000000000000019884624838656,
565
- "pad_to_multiple_of": null,
566
  "pad_token": "<pad>",
567
- "pad_token_type_id": 0,
568
- "padding_side": "right",
569
  "processor_class": "DonutProcessor",
570
  "sep_token": "</s>",
571
  "sp_model_kwargs": {},
572
- "stride": 0,
573
  "tokenizer_class": "XLMRobertaTokenizer",
574
- "truncation_side": "right",
575
- "truncation_strategy": "longest_first",
576
  "unk_token": "<unk>"
577
  }
 
560
  "cls_token": "<s>",
561
  "eos_token": "</s>",
562
  "mask_token": "<mask>",
 
563
  "model_max_length": 1000000000000000019884624838656,
 
564
  "pad_token": "<pad>",
 
 
565
  "processor_class": "DonutProcessor",
566
  "sep_token": "</s>",
567
  "sp_model_kwargs": {},
 
568
  "tokenizer_class": "XLMRobertaTokenizer",
 
 
569
  "unk_token": "<unk>"
570
  }