AdamCodd commited on
Commit
b20b842
1 Parent(s): 5771226

Upload 9 files

Browse files
added_tokens.json CHANGED
@@ -1,36 +1,39 @@
1
  {
2
  "</s_date>": 57532,
3
- "</s_ignore>": 57542,
4
- "</s_item_key>": 57548,
5
- "</s_item_name>": 57550,
6
- "</s_item_quantity>": 57554,
7
- "</s_item_value>": 57552,
8
- "</s_line_items>": 57546,
 
 
9
  "</s_store_addr>": 57528,
10
  "</s_store_name>": 57526,
11
  "</s_subtotal>": 57536,
12
- "</s_tax>": 57538,
13
- "</s_telephone>": 57530,
14
  "</s_time>": 57534,
15
  "</s_tips>": 57544,
16
- "</s_total>": 57540,
17
- "<s_cord-v2>": 57555,
18
  "<s_date>": 57531,
19
- "<s_ignore>": 57541,
20
  "<s_iitcdip>": 57523,
21
- "<s_item_key>": 57547,
22
- "<s_item_name>": 57549,
23
- "<s_item_quantity>": 57553,
24
- "<s_item_value>": 57551,
25
- "<s_line_items>": 57545,
 
 
26
  "<s_store_addr>": 57527,
27
  "<s_store_name>": 57525,
28
  "<s_subtotal>": 57535,
 
29
  "<s_synthdog>": 57524,
30
- "<s_tax>": 57537,
31
- "<s_telephone>": 57529,
32
  "<s_time>": 57533,
33
  "<s_tips>": 57543,
34
- "<s_total>": 57539,
35
  "<sep/>": 57522
36
  }
 
1
  {
2
  "</s_date>": 57532,
3
+ "</s_discount>": 57546,
4
+ "</s_item_key>": 57550,
5
+ "</s_item_name>": 57552,
6
+ "</s_item_quantity>": 57556,
7
+ "</s_item_value>": 57554,
8
+ "</s_line_items>": 57548,
9
+ "</s_phone>": 57530,
10
+ "</s_receipt>": 57558,
11
  "</s_store_addr>": 57528,
12
  "</s_store_name>": 57526,
13
  "</s_subtotal>": 57536,
14
+ "</s_svc>": 57538,
15
+ "</s_tax>": 57540,
16
  "</s_time>": 57534,
17
  "</s_tips>": 57544,
18
+ "</s_total>": 57542,
 
19
  "<s_date>": 57531,
20
+ "<s_discount>": 57545,
21
  "<s_iitcdip>": 57523,
22
+ "<s_item_key>": 57549,
23
+ "<s_item_name>": 57551,
24
+ "<s_item_quantity>": 57555,
25
+ "<s_item_value>": 57553,
26
+ "<s_line_items>": 57547,
27
+ "<s_phone>": 57529,
28
+ "<s_receipt>": 57557,
29
  "<s_store_addr>": 57527,
30
  "<s_store_name>": 57525,
31
  "<s_subtotal>": 57535,
32
+ "<s_svc>": 57537,
33
  "<s_synthdog>": 57524,
34
+ "<s_tax>": 57539,
 
35
  "<s_time>": 57533,
36
  "<s_tips>": 57543,
37
+ "<s_total>": 57541,
38
  "<sep/>": 57522
39
  }
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "naver-clova-ix/donut-base",
3
  "architectures": [
4
  "VisionEncoderDecoderModel"
5
  ],
@@ -85,9 +85,9 @@
85
  "typical_p": 1.0,
86
  "use_bfloat16": false,
87
  "use_cache": true,
88
- "vocab_size": 57556
89
  },
90
- "decoder_start_token_id": 57555,
91
  "encoder": {
92
  "_name_or_path": "",
93
  "add_cross_attention": false,
@@ -124,8 +124,8 @@
124
  "1": "LABEL_1"
125
  },
126
  "image_size": [
127
- 1280,
128
- 960
129
  ],
130
  "initializer_range": 0.02,
131
  "is_decoder": false,
@@ -187,6 +187,5 @@
187
  "model_type": "vision-encoder-decoder",
188
  "pad_token_id": 1,
189
  "tie_word_embeddings": false,
190
- "torch_dtype": "float32",
191
  "transformers_version": "4.36.2"
192
  }
 
1
  {
2
+ "_name_or_path": "C:\\Users\\adam\\Documents\\Code\\LLMs\\Trained_Models\\Donut-base\\v2",
3
  "architectures": [
4
  "VisionEncoderDecoderModel"
5
  ],
 
85
  "typical_p": 1.0,
86
  "use_bfloat16": false,
87
  "use_cache": true,
88
+ "vocab_size": 57559
89
  },
90
+ "decoder_start_token_id": 57557,
91
  "encoder": {
92
  "_name_or_path": "",
93
  "add_cross_attention": false,
 
124
  "1": "LABEL_1"
125
  },
126
  "image_size": [
127
+ 1798,
128
+ 1348
129
  ],
130
  "initializer_range": 0.02,
131
  "is_decoder": false,
 
187
  "model_type": "vision-encoder-decoder",
188
  "pad_token_id": 1,
189
  "tie_word_embeddings": false,
 
190
  "transformers_version": "4.36.2"
191
  }
generation_config.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "_from_model_config": true,
3
  "bos_token_id": 0,
4
- "decoder_start_token_id": 57555,
5
  "eos_token_id": 2,
6
  "forced_eos_token_id": 2,
7
  "max_length": 768,
 
1
  {
2
  "_from_model_config": true,
3
  "bos_token_id": 0,
4
+ "decoder_start_token_id": 57557,
5
  "eos_token_id": 2,
6
  "forced_eos_token_id": 2,
7
  "max_length": 768,
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:403f1abb6b63ce7e6002b47faf53a536b25d31e394b435e7f305dda2fa219b2a
3
- size 809197720
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9488f552c39187659857b11e3e83b45a615e037d0c57f37f77d528a1682099cc
3
+ size 809210008
preprocessor_config.json CHANGED
@@ -19,8 +19,8 @@
19
  "processor_class": "DonutProcessor",
20
  "resample": 2,
21
  "rescale_factor": 0.00392156862745098,
22
- "size": [
23
- 960,
24
- 1280
25
- ]
26
- }
 
19
  "processor_class": "DonutProcessor",
20
  "resample": 2,
21
  "rescale_factor": 0.00392156862745098,
22
+ "size": {
23
+ "height": 1798,
24
+ "width": 1348
25
+ }
26
+ }
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
@@ -97,7 +97,7 @@
97
  "special": false
98
  },
99
  "57529": {
100
- "content": "<s_telephone>",
101
  "lstrip": false,
102
  "normalized": true,
103
  "rstrip": false,
@@ -105,7 +105,7 @@
105
  "special": false
106
  },
107
  "57530": {
108
- "content": "</s_telephone>",
109
  "lstrip": false,
110
  "normalized": true,
111
  "rstrip": false,
@@ -161,7 +161,7 @@
161
  "special": false
162
  },
163
  "57537": {
164
- "content": "<s_tax>",
165
  "lstrip": false,
166
  "normalized": true,
167
  "rstrip": false,
@@ -169,7 +169,7 @@
169
  "special": false
170
  },
171
  "57538": {
172
- "content": "</s_tax>",
173
  "lstrip": false,
174
  "normalized": true,
175
  "rstrip": false,
@@ -177,7 +177,7 @@
177
  "special": false
178
  },
179
  "57539": {
180
- "content": "<s_total>",
181
  "lstrip": false,
182
  "normalized": true,
183
  "rstrip": false,
@@ -185,7 +185,7 @@
185
  "special": false
186
  },
187
  "57540": {
188
- "content": "</s_total>",
189
  "lstrip": false,
190
  "normalized": true,
191
  "rstrip": false,
@@ -193,7 +193,7 @@
193
  "special": false
194
  },
195
  "57541": {
196
- "content": "<s_ignore>",
197
  "lstrip": false,
198
  "normalized": true,
199
  "rstrip": false,
@@ -201,7 +201,7 @@
201
  "special": false
202
  },
203
  "57542": {
204
- "content": "</s_ignore>",
205
  "lstrip": false,
206
  "normalized": true,
207
  "rstrip": false,
@@ -225,7 +225,7 @@
225
  "special": false
226
  },
227
  "57545": {
228
- "content": "<s_line_items>",
229
  "lstrip": false,
230
  "normalized": true,
231
  "rstrip": false,
@@ -233,7 +233,7 @@
233
  "special": false
234
  },
235
  "57546": {
236
- "content": "</s_line_items>",
237
  "lstrip": false,
238
  "normalized": true,
239
  "rstrip": false,
@@ -241,7 +241,7 @@
241
  "special": false
242
  },
243
  "57547": {
244
- "content": "<s_item_key>",
245
  "lstrip": false,
246
  "normalized": true,
247
  "rstrip": false,
@@ -249,7 +249,7 @@
249
  "special": false
250
  },
251
  "57548": {
252
- "content": "</s_item_key>",
253
  "lstrip": false,
254
  "normalized": true,
255
  "rstrip": false,
@@ -257,7 +257,7 @@
257
  "special": false
258
  },
259
  "57549": {
260
- "content": "<s_item_name>",
261
  "lstrip": false,
262
  "normalized": true,
263
  "rstrip": false,
@@ -265,7 +265,7 @@
265
  "special": false
266
  },
267
  "57550": {
268
- "content": "</s_item_name>",
269
  "lstrip": false,
270
  "normalized": true,
271
  "rstrip": false,
@@ -273,7 +273,7 @@
273
  "special": false
274
  },
275
  "57551": {
276
- "content": "<s_item_value>",
277
  "lstrip": false,
278
  "normalized": true,
279
  "rstrip": false,
@@ -281,7 +281,7 @@
281
  "special": false
282
  },
283
  "57552": {
284
- "content": "</s_item_value>",
285
  "lstrip": false,
286
  "normalized": true,
287
  "rstrip": false,
@@ -289,7 +289,7 @@
289
  "special": false
290
  },
291
  "57553": {
292
- "content": "<s_item_quantity>",
293
  "lstrip": false,
294
  "normalized": true,
295
  "rstrip": false,
@@ -297,7 +297,7 @@
297
  "special": false
298
  },
299
  "57554": {
300
- "content": "</s_item_quantity>",
301
  "lstrip": false,
302
  "normalized": true,
303
  "rstrip": false,
@@ -305,7 +305,31 @@
305
  "special": false
306
  },
307
  "57555": {
308
- "content": "<s_cord-v2>",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
309
  "lstrip": false,
310
  "normalized": true,
311
  "rstrip": false,
 
97
  "special": false
98
  },
99
  "57529": {
100
+ "content": "<s_phone>",
101
  "lstrip": false,
102
  "normalized": true,
103
  "rstrip": false,
 
105
  "special": false
106
  },
107
  "57530": {
108
+ "content": "</s_phone>",
109
  "lstrip": false,
110
  "normalized": true,
111
  "rstrip": false,
 
161
  "special": false
162
  },
163
  "57537": {
164
+ "content": "<s_svc>",
165
  "lstrip": false,
166
  "normalized": true,
167
  "rstrip": false,
 
169
  "special": false
170
  },
171
  "57538": {
172
+ "content": "</s_svc>",
173
  "lstrip": false,
174
  "normalized": true,
175
  "rstrip": false,
 
177
  "special": false
178
  },
179
  "57539": {
180
+ "content": "<s_tax>",
181
  "lstrip": false,
182
  "normalized": true,
183
  "rstrip": false,
 
185
  "special": false
186
  },
187
  "57540": {
188
+ "content": "</s_tax>",
189
  "lstrip": false,
190
  "normalized": true,
191
  "rstrip": false,
 
193
  "special": false
194
  },
195
  "57541": {
196
+ "content": "<s_total>",
197
  "lstrip": false,
198
  "normalized": true,
199
  "rstrip": false,
 
201
  "special": false
202
  },
203
  "57542": {
204
+ "content": "</s_total>",
205
  "lstrip": false,
206
  "normalized": true,
207
  "rstrip": false,
 
225
  "special": false
226
  },
227
  "57545": {
228
+ "content": "<s_discount>",
229
  "lstrip": false,
230
  "normalized": true,
231
  "rstrip": false,
 
233
  "special": false
234
  },
235
  "57546": {
236
+ "content": "</s_discount>",
237
  "lstrip": false,
238
  "normalized": true,
239
  "rstrip": false,
 
241
  "special": false
242
  },
243
  "57547": {
244
+ "content": "<s_line_items>",
245
  "lstrip": false,
246
  "normalized": true,
247
  "rstrip": false,
 
249
  "special": false
250
  },
251
  "57548": {
252
+ "content": "</s_line_items>",
253
  "lstrip": false,
254
  "normalized": true,
255
  "rstrip": false,
 
257
  "special": false
258
  },
259
  "57549": {
260
+ "content": "<s_item_key>",
261
  "lstrip": false,
262
  "normalized": true,
263
  "rstrip": false,
 
265
  "special": false
266
  },
267
  "57550": {
268
+ "content": "</s_item_key>",
269
  "lstrip": false,
270
  "normalized": true,
271
  "rstrip": false,
 
273
  "special": false
274
  },
275
  "57551": {
276
+ "content": "<s_item_name>",
277
  "lstrip": false,
278
  "normalized": true,
279
  "rstrip": false,
 
281
  "special": false
282
  },
283
  "57552": {
284
+ "content": "</s_item_name>",
285
  "lstrip": false,
286
  "normalized": true,
287
  "rstrip": false,
 
289
  "special": false
290
  },
291
  "57553": {
292
+ "content": "<s_item_value>",
293
  "lstrip": false,
294
  "normalized": true,
295
  "rstrip": false,
 
297
  "special": false
298
  },
299
  "57554": {
300
+ "content": "</s_item_value>",
301
  "lstrip": false,
302
  "normalized": true,
303
  "rstrip": false,
 
305
  "special": false
306
  },
307
  "57555": {
308
+ "content": "<s_item_quantity>",
309
+ "lstrip": false,
310
+ "normalized": true,
311
+ "rstrip": false,
312
+ "single_word": false,
313
+ "special": false
314
+ },
315
+ "57556": {
316
+ "content": "</s_item_quantity>",
317
+ "lstrip": false,
318
+ "normalized": true,
319
+ "rstrip": false,
320
+ "single_word": false,
321
+ "special": false
322
+ },
323
+ "57557": {
324
+ "content": "<s_receipt>",
325
+ "lstrip": false,
326
+ "normalized": true,
327
+ "rstrip": false,
328
+ "single_word": false,
329
+ "special": false
330
+ },
331
+ "57558": {
332
+ "content": "</s_receipt>",
333
  "lstrip": false,
334
  "normalized": true,
335
  "rstrip": false,