Jasondeepmusic commited on
Commit
a65da4d
1 Parent(s): 577a1c1

Training done

Browse files
added_tokens.json CHANGED
@@ -1,58 +1,22 @@
1
  {
2
- "</s_>": 57577,
3
- "</s_Doctype>": 57569,
4
- "</s_None>": 57564,
5
- "</s_client>": 57534,
6
- "</s_client_tax_id>": 57538,
7
- "</s_company_nm>": 57567,
8
- "</s_date>": 57571,
9
  "</s_header>": 57526,
10
- "</s_iban>": 57540,
11
- "</s_invoice_date>": 57530,
12
- "</s_invoice_no>": 57528,
13
- "</s_item>": 57573,
14
- "</s_item_desc>": 57544,
15
- "</s_item_gross_worth>": 57554,
16
- "</s_item_net_price>": 57548,
17
- "</s_item_net_worth>": 57550,
18
- "</s_item_qty>": 57546,
19
- "</s_item_vat>": 57552,
20
- "</s_items>": 57542,
21
- "</s_seller>": 57532,
22
- "</s_seller_tax_id>": 57536,
23
- "</s_summary>": 57556,
24
- "</s_total_gross_worth>": 57562,
25
- "</s_total_net_worth>": 57558,
26
- "</s_total_vat>": 57560,
27
- "</s_value>": 57575,
28
- "<s_>": 57576,
29
- "<s_Doctype>": 57568,
30
- "<s_None>": 57563,
31
- "<s_client>": 57533,
32
- "<s_client_tax_id>": 57537,
33
- "<s_company_nm>": 57566,
34
- "<s_cord-v2>": 57565,
35
- "<s_date>": 57570,
36
  "<s_header>": 57525,
37
- "<s_iban>": 57539,
38
  "<s_iitcdip>": 57523,
39
- "<s_invoice_date>": 57529,
40
- "<s_invoice_no>": 57527,
41
- "<s_item>": 57572,
42
- "<s_item_desc>": 57543,
43
- "<s_item_gross_worth>": 57553,
44
- "<s_item_net_price>": 57547,
45
- "<s_item_net_worth>": 57549,
46
- "<s_item_qty>": 57545,
47
- "<s_item_vat>": 57551,
48
- "<s_items>": 57541,
49
- "<s_seller>": 57531,
50
- "<s_seller_tax_id>": 57535,
51
- "<s_summary>": 57555,
52
  "<s_synthdog>": 57524,
53
- "<s_total_gross_worth>": 57561,
54
- "<s_total_net_worth>": 57557,
55
- "<s_total_vat>": 57559,
56
- "<s_value>": 57574,
57
  "<sep/>": 57522
58
  }
 
1
  {
2
+ "</s_>": 57540,
3
+ "</s_Doctype>": 57530,
4
+ "</s_company_nm>": 57528,
5
+ "</s_date>": 57532,
 
 
 
6
  "</s_header>": 57526,
7
+ "</s_item>": 57536,
8
+ "</s_items>": 57534,
9
+ "</s_value>": 57538,
10
+ "<s_>": 57539,
11
+ "<s_Doctype>": 57529,
12
+ "<s_company_nm>": 57527,
13
+ "<s_cord-v2>": 57541,
14
+ "<s_date>": 57531,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  "<s_header>": 57525,
 
16
  "<s_iitcdip>": 57523,
17
+ "<s_item>": 57535,
18
+ "<s_items>": 57533,
 
 
 
 
 
 
 
 
 
 
 
19
  "<s_synthdog>": 57524,
20
+ "<s_value>": 57537,
 
 
 
21
  "<sep/>": 57522
22
  }
preprocessor_config.json CHANGED
@@ -19,8 +19,8 @@
19
  "processor_class": "DonutProcessor",
20
  "resample": 2,
21
  "rescale_factor": 0.00392156862745098,
22
- "size": {
23
- "height": 1280,
24
- "width": 960
25
- }
26
  }
 
19
  "processor_class": "DonutProcessor",
20
  "resample": 2,
21
  "rescale_factor": 0.00392156862745098,
22
+ "size": [
23
+ 960,
24
+ 1280
25
+ ]
26
  }
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
@@ -81,7 +81,7 @@
81
  "special": false
82
  },
83
  "57527": {
84
- "content": "<s_invoice_no>",
85
  "lstrip": false,
86
  "normalized": true,
87
  "rstrip": false,
@@ -89,7 +89,7 @@
89
  "special": false
90
  },
91
  "57528": {
92
- "content": "</s_invoice_no>",
93
  "lstrip": false,
94
  "normalized": true,
95
  "rstrip": false,
@@ -97,7 +97,7 @@
97
  "special": false
98
  },
99
  "57529": {
100
- "content": "<s_invoice_date>",
101
  "lstrip": false,
102
  "normalized": true,
103
  "rstrip": false,
@@ -105,7 +105,7 @@
105
  "special": false
106
  },
107
  "57530": {
108
- "content": "</s_invoice_date>",
109
  "lstrip": false,
110
  "normalized": true,
111
  "rstrip": false,
@@ -113,7 +113,7 @@
113
  "special": false
114
  },
115
  "57531": {
116
- "content": "<s_seller>",
117
  "lstrip": false,
118
  "normalized": true,
119
  "rstrip": false,
@@ -121,7 +121,7 @@
121
  "special": false
122
  },
123
  "57532": {
124
- "content": "</s_seller>",
125
  "lstrip": false,
126
  "normalized": true,
127
  "rstrip": false,
@@ -129,7 +129,7 @@
129
  "special": false
130
  },
131
  "57533": {
132
- "content": "<s_client>",
133
  "lstrip": false,
134
  "normalized": true,
135
  "rstrip": false,
@@ -137,7 +137,7 @@
137
  "special": false
138
  },
139
  "57534": {
140
- "content": "</s_client>",
141
  "lstrip": false,
142
  "normalized": true,
143
  "rstrip": false,
@@ -145,7 +145,7 @@
145
  "special": false
146
  },
147
  "57535": {
148
- "content": "<s_seller_tax_id>",
149
  "lstrip": false,
150
  "normalized": true,
151
  "rstrip": false,
@@ -153,7 +153,7 @@
153
  "special": false
154
  },
155
  "57536": {
156
- "content": "</s_seller_tax_id>",
157
  "lstrip": false,
158
  "normalized": true,
159
  "rstrip": false,
@@ -161,7 +161,7 @@
161
  "special": false
162
  },
163
  "57537": {
164
- "content": "<s_client_tax_id>",
165
  "lstrip": false,
166
  "normalized": true,
167
  "rstrip": false,
@@ -169,7 +169,7 @@
169
  "special": false
170
  },
171
  "57538": {
172
- "content": "</s_client_tax_id>",
173
  "lstrip": false,
174
  "normalized": true,
175
  "rstrip": false,
@@ -177,7 +177,7 @@
177
  "special": false
178
  },
179
  "57539": {
180
- "content": "<s_iban>",
181
  "lstrip": false,
182
  "normalized": true,
183
  "rstrip": false,
@@ -185,7 +185,7 @@
185
  "special": false
186
  },
187
  "57540": {
188
- "content": "</s_iban>",
189
  "lstrip": false,
190
  "normalized": true,
191
  "rstrip": false,
@@ -193,300 +193,12 @@
193
  "special": false
194
  },
195
  "57541": {
196
- "content": "<s_items>",
197
- "lstrip": false,
198
- "normalized": true,
199
- "rstrip": false,
200
- "single_word": false,
201
- "special": false
202
- },
203
- "57542": {
204
- "content": "</s_items>",
205
- "lstrip": false,
206
- "normalized": true,
207
- "rstrip": false,
208
- "single_word": false,
209
- "special": false
210
- },
211
- "57543": {
212
- "content": "<s_item_desc>",
213
- "lstrip": false,
214
- "normalized": true,
215
- "rstrip": false,
216
- "single_word": false,
217
- "special": false
218
- },
219
- "57544": {
220
- "content": "</s_item_desc>",
221
- "lstrip": false,
222
- "normalized": true,
223
- "rstrip": false,
224
- "single_word": false,
225
- "special": false
226
- },
227
- "57545": {
228
- "content": "<s_item_qty>",
229
- "lstrip": false,
230
- "normalized": true,
231
- "rstrip": false,
232
- "single_word": false,
233
- "special": false
234
- },
235
- "57546": {
236
- "content": "</s_item_qty>",
237
- "lstrip": false,
238
- "normalized": true,
239
- "rstrip": false,
240
- "single_word": false,
241
- "special": false
242
- },
243
- "57547": {
244
- "content": "<s_item_net_price>",
245
- "lstrip": false,
246
- "normalized": true,
247
- "rstrip": false,
248
- "single_word": false,
249
- "special": false
250
- },
251
- "57548": {
252
- "content": "</s_item_net_price>",
253
- "lstrip": false,
254
- "normalized": true,
255
- "rstrip": false,
256
- "single_word": false,
257
- "special": false
258
- },
259
- "57549": {
260
- "content": "<s_item_net_worth>",
261
- "lstrip": false,
262
- "normalized": true,
263
- "rstrip": false,
264
- "single_word": false,
265
- "special": false
266
- },
267
- "57550": {
268
- "content": "</s_item_net_worth>",
269
- "lstrip": false,
270
- "normalized": true,
271
- "rstrip": false,
272
- "single_word": false,
273
- "special": false
274
- },
275
- "57551": {
276
- "content": "<s_item_vat>",
277
- "lstrip": false,
278
- "normalized": true,
279
- "rstrip": false,
280
- "single_word": false,
281
- "special": false
282
- },
283
- "57552": {
284
- "content": "</s_item_vat>",
285
- "lstrip": false,
286
- "normalized": true,
287
- "rstrip": false,
288
- "single_word": false,
289
- "special": false
290
- },
291
- "57553": {
292
- "content": "<s_item_gross_worth>",
293
- "lstrip": false,
294
- "normalized": true,
295
- "rstrip": false,
296
- "single_word": false,
297
- "special": false
298
- },
299
- "57554": {
300
- "content": "</s_item_gross_worth>",
301
- "lstrip": false,
302
- "normalized": true,
303
- "rstrip": false,
304
- "single_word": false,
305
- "special": false
306
- },
307
- "57555": {
308
- "content": "<s_summary>",
309
- "lstrip": false,
310
- "normalized": true,
311
- "rstrip": false,
312
- "single_word": false,
313
- "special": false
314
- },
315
- "57556": {
316
- "content": "</s_summary>",
317
- "lstrip": false,
318
- "normalized": true,
319
- "rstrip": false,
320
- "single_word": false,
321
- "special": false
322
- },
323
- "57557": {
324
- "content": "<s_total_net_worth>",
325
- "lstrip": false,
326
- "normalized": true,
327
- "rstrip": false,
328
- "single_word": false,
329
- "special": false
330
- },
331
- "57558": {
332
- "content": "</s_total_net_worth>",
333
- "lstrip": false,
334
- "normalized": true,
335
- "rstrip": false,
336
- "single_word": false,
337
- "special": false
338
- },
339
- "57559": {
340
- "content": "<s_total_vat>",
341
- "lstrip": false,
342
- "normalized": true,
343
- "rstrip": false,
344
- "single_word": false,
345
- "special": false
346
- },
347
- "57560": {
348
- "content": "</s_total_vat>",
349
- "lstrip": false,
350
- "normalized": true,
351
- "rstrip": false,
352
- "single_word": false,
353
- "special": false
354
- },
355
- "57561": {
356
- "content": "<s_total_gross_worth>",
357
- "lstrip": false,
358
- "normalized": true,
359
- "rstrip": false,
360
- "single_word": false,
361
- "special": false
362
- },
363
- "57562": {
364
- "content": "</s_total_gross_worth>",
365
- "lstrip": false,
366
- "normalized": true,
367
- "rstrip": false,
368
- "single_word": false,
369
- "special": false
370
- },
371
- "57563": {
372
- "content": "<s_None>",
373
- "lstrip": false,
374
- "normalized": true,
375
- "rstrip": false,
376
- "single_word": false,
377
- "special": false
378
- },
379
- "57564": {
380
- "content": "</s_None>",
381
- "lstrip": false,
382
- "normalized": true,
383
- "rstrip": false,
384
- "single_word": false,
385
- "special": false
386
- },
387
- "57565": {
388
  "content": "<s_cord-v2>",
389
  "lstrip": false,
390
  "normalized": true,
391
  "rstrip": false,
392
  "single_word": false,
393
  "special": false
394
- },
395
- "57566": {
396
- "content": "<s_company_nm>",
397
- "lstrip": false,
398
- "normalized": true,
399
- "rstrip": false,
400
- "single_word": false,
401
- "special": false
402
- },
403
- "57567": {
404
- "content": "</s_company_nm>",
405
- "lstrip": false,
406
- "normalized": true,
407
- "rstrip": false,
408
- "single_word": false,
409
- "special": false
410
- },
411
- "57568": {
412
- "content": "<s_Doctype>",
413
- "lstrip": false,
414
- "normalized": true,
415
- "rstrip": false,
416
- "single_word": false,
417
- "special": false
418
- },
419
- "57569": {
420
- "content": "</s_Doctype>",
421
- "lstrip": false,
422
- "normalized": true,
423
- "rstrip": false,
424
- "single_word": false,
425
- "special": false
426
- },
427
- "57570": {
428
- "content": "<s_date>",
429
- "lstrip": false,
430
- "normalized": true,
431
- "rstrip": false,
432
- "single_word": false,
433
- "special": false
434
- },
435
- "57571": {
436
- "content": "</s_date>",
437
- "lstrip": false,
438
- "normalized": true,
439
- "rstrip": false,
440
- "single_word": false,
441
- "special": false
442
- },
443
- "57572": {
444
- "content": "<s_item>",
445
- "lstrip": false,
446
- "normalized": true,
447
- "rstrip": false,
448
- "single_word": false,
449
- "special": false
450
- },
451
- "57573": {
452
- "content": "</s_item>",
453
- "lstrip": false,
454
- "normalized": true,
455
- "rstrip": false,
456
- "single_word": false,
457
- "special": false
458
- },
459
- "57574": {
460
- "content": "<s_value>",
461
- "lstrip": false,
462
- "normalized": true,
463
- "rstrip": false,
464
- "single_word": false,
465
- "special": false
466
- },
467
- "57575": {
468
- "content": "</s_value>",
469
- "lstrip": false,
470
- "normalized": true,
471
- "rstrip": false,
472
- "single_word": false,
473
- "special": false
474
- },
475
- "57576": {
476
- "content": "<s_>",
477
- "lstrip": false,
478
- "normalized": true,
479
- "rstrip": false,
480
- "single_word": false,
481
- "special": false
482
- },
483
- "57577": {
484
- "content": "</s_>",
485
- "lstrip": false,
486
- "normalized": true,
487
- "rstrip": false,
488
- "single_word": false,
489
- "special": false
490
  }
491
  },
492
  "additional_special_tokens": [
@@ -498,18 +210,11 @@
498
  "cls_token": "<s>",
499
  "eos_token": "</s>",
500
  "mask_token": "<mask>",
501
- "max_length": 768,
502
  "model_max_length": 1000000000000000019884624838656,
503
- "pad_to_multiple_of": null,
504
  "pad_token": "<pad>",
505
- "pad_token_type_id": 0,
506
- "padding_side": "right",
507
  "processor_class": "DonutProcessor",
508
  "sep_token": "</s>",
509
  "sp_model_kwargs": {},
510
- "stride": 0,
511
  "tokenizer_class": "XLMRobertaTokenizer",
512
- "truncation_side": "right",
513
- "truncation_strategy": "longest_first",
514
  "unk_token": "<unk>"
515
  }
 
81
  "special": false
82
  },
83
  "57527": {
84
+ "content": "<s_company_nm>",
85
  "lstrip": false,
86
  "normalized": true,
87
  "rstrip": false,
 
89
  "special": false
90
  },
91
  "57528": {
92
+ "content": "</s_company_nm>",
93
  "lstrip": false,
94
  "normalized": true,
95
  "rstrip": false,
 
97
  "special": false
98
  },
99
  "57529": {
100
+ "content": "<s_Doctype>",
101
  "lstrip": false,
102
  "normalized": true,
103
  "rstrip": false,
 
105
  "special": false
106
  },
107
  "57530": {
108
+ "content": "</s_Doctype>",
109
  "lstrip": false,
110
  "normalized": true,
111
  "rstrip": false,
 
113
  "special": false
114
  },
115
  "57531": {
116
+ "content": "<s_date>",
117
  "lstrip": false,
118
  "normalized": true,
119
  "rstrip": false,
 
121
  "special": false
122
  },
123
  "57532": {
124
+ "content": "</s_date>",
125
  "lstrip": false,
126
  "normalized": true,
127
  "rstrip": false,
 
129
  "special": false
130
  },
131
  "57533": {
132
+ "content": "<s_items>",
133
  "lstrip": false,
134
  "normalized": true,
135
  "rstrip": false,
 
137
  "special": false
138
  },
139
  "57534": {
140
+ "content": "</s_items>",
141
  "lstrip": false,
142
  "normalized": true,
143
  "rstrip": false,
 
145
  "special": false
146
  },
147
  "57535": {
148
+ "content": "<s_item>",
149
  "lstrip": false,
150
  "normalized": true,
151
  "rstrip": false,
 
153
  "special": false
154
  },
155
  "57536": {
156
+ "content": "</s_item>",
157
  "lstrip": false,
158
  "normalized": true,
159
  "rstrip": false,
 
161
  "special": false
162
  },
163
  "57537": {
164
+ "content": "<s_value>",
165
  "lstrip": false,
166
  "normalized": true,
167
  "rstrip": false,
 
169
  "special": false
170
  },
171
  "57538": {
172
+ "content": "</s_value>",
173
  "lstrip": false,
174
  "normalized": true,
175
  "rstrip": false,
 
177
  "special": false
178
  },
179
  "57539": {
180
+ "content": "<s_>",
181
  "lstrip": false,
182
  "normalized": true,
183
  "rstrip": false,
 
185
  "special": false
186
  },
187
  "57540": {
188
+ "content": "</s_>",
189
  "lstrip": false,
190
  "normalized": true,
191
  "rstrip": false,
 
193
  "special": false
194
  },
195
  "57541": {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
196
  "content": "<s_cord-v2>",
197
  "lstrip": false,
198
  "normalized": true,
199
  "rstrip": false,
200
  "single_word": false,
201
  "special": false
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
202
  }
203
  },
204
  "additional_special_tokens": [
 
210
  "cls_token": "<s>",
211
  "eos_token": "</s>",
212
  "mask_token": "<mask>",
 
213
  "model_max_length": 1000000000000000019884624838656,
 
214
  "pad_token": "<pad>",
 
 
215
  "processor_class": "DonutProcessor",
216
  "sep_token": "</s>",
217
  "sp_model_kwargs": {},
 
218
  "tokenizer_class": "XLMRobertaTokenizer",
 
 
219
  "unk_token": "<unk>"
220
  }