grimu commited on
Commit
f1a8fe1
1 Parent(s): 107238c

Upload processor

Browse files
added_tokens.json CHANGED
@@ -1,31 +1,50 @@
1
  {
2
- "</s_Cooking>": 57548,
3
- "</s_Fish>": 57530,
4
- "</s_Gear>": 57544,
5
- "</s_Latin>": 57532,
6
- "</s_Packing>": 57542,
7
- "</s_Peche>": 57528,
8
- "</s_Provenance>": 57550,
9
- "</s_Size>": 57546,
10
- "</s_Szone>": 57540,
11
- "</s_SzoneCode>": 57536,
12
- "</s_Zone>": 57538,
13
- "</s_ZoneCode>": 57534,
14
- "</s_ner>": 57526,
15
- "<s_Cooking>": 57547,
16
- "<s_Fish>": 57529,
17
- "<s_Gear>": 57543,
18
- "<s_Latin>": 57531,
19
- "<s_Packing>": 57541,
20
- "<s_Peche>": 57527,
21
- "<s_Provenance>": 57549,
22
- "<s_Size>": 57545,
23
- "<s_Szone>": 57539,
24
- "<s_SzoneCode>": 57535,
25
- "<s_Zone>": 57537,
26
- "<s_ZoneCode>": 57533,
27
- "<s_iitcdip>": 57523,
28
- "<s_ner>": 57525,
29
- "<s_synthdog>": 57524,
30
- "<sep/>": 57522
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  }
 
1
  {
2
+ "</s_Cooking>": 57567,
3
+ "</s_Fish>": 57549,
4
+ "</s_Gear>": 57563,
5
+ "</s_Latin>": 57551,
6
+ "</s_Packing>": 57561,
7
+ "</s_Peche>": 57547,
8
+ "</s_Provenance>": 57569,
9
+ "</s_Size>": 57565,
10
+ "</s_Szone>": 57559,
11
+ "</s_SzoneCode>": 57555,
12
+ "</s_Zone>": 57557,
13
+ "</s_ZoneCode>": 57553,
14
+ "</s_answer>": 57523,
15
+ "</s_ner>": 57545,
16
+ "</s_question>": 57524,
17
+ "<Fish/>": 57533,
18
+ "<Gear/>": 57539,
19
+ "<Latin/>": 57538,
20
+ "<O/>": 57532,
21
+ "<Packing/>": 57536,
22
+ "<Peche/>": 57534,
23
+ "<SZone/>": 57542,
24
+ "<Size/>": 57537,
25
+ "<SzoneCode/>": 57543,
26
+ "<Zone/>": 57540,
27
+ "<ZoneCode/>": 57541,
28
+ "<no/>": 57525,
29
+ "<s_Cooking>": 57566,
30
+ "<s_Fish>": 57548,
31
+ "<s_Gear>": 57562,
32
+ "<s_Latin>": 57550,
33
+ "<s_Packing>": 57560,
34
+ "<s_Peche>": 57546,
35
+ "<s_Provenance>": 57568,
36
+ "<s_Size>": 57564,
37
+ "<s_Szone>": 57558,
38
+ "<s_SzoneCode>": 57554,
39
+ "<s_Zone>": 57556,
40
+ "<s_ZoneCode>": 57552,
41
+ "<s_answer>": 57526,
42
+ "<s_docvqa>": 57527,
43
+ "<s_iitcdip>": 57528,
44
+ "<s_ner>": 57544,
45
+ "<s_question>": 57529,
46
+ "<s_synthdog>": 57530,
47
+ "<sep/>": 57522,
48
+ "<yes/>": 57531,
49
+ "Cooking/>": 57535
50
  }
special_tokens_map.json CHANGED
@@ -1,7 +1,14 @@
1
  {
2
  "additional_special_tokens": [
 
 
 
 
 
3
  "<s_iitcdip>",
4
- "<s_synthdog>"
 
 
5
  ],
6
  "bos_token": "<s>",
7
  "cls_token": "<s>",
 
1
  {
2
  "additional_special_tokens": [
3
+ "</s_answer>",
4
+ "</s_question>",
5
+ "<no/>",
6
+ "<s_answer>",
7
+ "<s_docvqa>",
8
  "<s_iitcdip>",
9
+ "<s_question>",
10
+ "<s_synthdog>",
11
+ "<yes/>"
12
  ],
13
  "bos_token": "<s>",
14
  "cls_token": "<s>",
tokenizer.json CHANGED
@@ -2,13 +2,13 @@
2
  "version": "1.0",
3
  "truncation": {
4
  "direction": "Right",
5
- "max_length": 256,
6
  "strategy": "LongestFirst",
7
  "stride": 0
8
  },
9
  "padding": {
10
  "strategy": {
11
- "Fixed": 256
12
  },
13
  "direction": "Right",
14
  "pad_to_multiple_of": null,
@@ -73,7 +73,7 @@
73
  },
74
  {
75
  "id": 57523,
76
- "content": "<s_iitcdip>",
77
  "single_word": false,
78
  "lstrip": false,
79
  "rstrip": false,
@@ -82,7 +82,7 @@
82
  },
83
  {
84
  "id": 57524,
85
- "content": "<s_synthdog>",
86
  "single_word": false,
87
  "lstrip": false,
88
  "rstrip": false,
@@ -91,6 +91,177 @@
91
  },
92
  {
93
  "id": 57525,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
  "content": "<s_ner>",
95
  "single_word": false,
96
  "lstrip": false,
@@ -99,7 +270,7 @@
99
  "special": false
100
  },
101
  {
102
- "id": 57526,
103
  "content": "</s_ner>",
104
  "single_word": false,
105
  "lstrip": false,
@@ -108,7 +279,7 @@
108
  "special": false
109
  },
110
  {
111
- "id": 57527,
112
  "content": "<s_Peche>",
113
  "single_word": false,
114
  "lstrip": false,
@@ -117,7 +288,7 @@
117
  "special": false
118
  },
119
  {
120
- "id": 57528,
121
  "content": "</s_Peche>",
122
  "single_word": false,
123
  "lstrip": false,
@@ -126,7 +297,7 @@
126
  "special": false
127
  },
128
  {
129
- "id": 57529,
130
  "content": "<s_Fish>",
131
  "single_word": false,
132
  "lstrip": false,
@@ -135,7 +306,7 @@
135
  "special": false
136
  },
137
  {
138
- "id": 57530,
139
  "content": "</s_Fish>",
140
  "single_word": false,
141
  "lstrip": false,
@@ -144,7 +315,7 @@
144
  "special": false
145
  },
146
  {
147
- "id": 57531,
148
  "content": "<s_Latin>",
149
  "single_word": false,
150
  "lstrip": false,
@@ -153,7 +324,7 @@
153
  "special": false
154
  },
155
  {
156
- "id": 57532,
157
  "content": "</s_Latin>",
158
  "single_word": false,
159
  "lstrip": false,
@@ -162,7 +333,7 @@
162
  "special": false
163
  },
164
  {
165
- "id": 57533,
166
  "content": "<s_ZoneCode>",
167
  "single_word": false,
168
  "lstrip": false,
@@ -171,7 +342,7 @@
171
  "special": false
172
  },
173
  {
174
- "id": 57534,
175
  "content": "</s_ZoneCode>",
176
  "single_word": false,
177
  "lstrip": false,
@@ -180,7 +351,7 @@
180
  "special": false
181
  },
182
  {
183
- "id": 57535,
184
  "content": "<s_SzoneCode>",
185
  "single_word": false,
186
  "lstrip": false,
@@ -189,7 +360,7 @@
189
  "special": false
190
  },
191
  {
192
- "id": 57536,
193
  "content": "</s_SzoneCode>",
194
  "single_word": false,
195
  "lstrip": false,
@@ -198,7 +369,7 @@
198
  "special": false
199
  },
200
  {
201
- "id": 57537,
202
  "content": "<s_Zone>",
203
  "single_word": false,
204
  "lstrip": false,
@@ -207,7 +378,7 @@
207
  "special": false
208
  },
209
  {
210
- "id": 57538,
211
  "content": "</s_Zone>",
212
  "single_word": false,
213
  "lstrip": false,
@@ -216,7 +387,7 @@
216
  "special": false
217
  },
218
  {
219
- "id": 57539,
220
  "content": "<s_Szone>",
221
  "single_word": false,
222
  "lstrip": false,
@@ -225,7 +396,7 @@
225
  "special": false
226
  },
227
  {
228
- "id": 57540,
229
  "content": "</s_Szone>",
230
  "single_word": false,
231
  "lstrip": false,
@@ -234,7 +405,7 @@
234
  "special": false
235
  },
236
  {
237
- "id": 57541,
238
  "content": "<s_Packing>",
239
  "single_word": false,
240
  "lstrip": false,
@@ -243,7 +414,7 @@
243
  "special": false
244
  },
245
  {
246
- "id": 57542,
247
  "content": "</s_Packing>",
248
  "single_word": false,
249
  "lstrip": false,
@@ -252,7 +423,7 @@
252
  "special": false
253
  },
254
  {
255
- "id": 57543,
256
  "content": "<s_Gear>",
257
  "single_word": false,
258
  "lstrip": false,
@@ -261,7 +432,7 @@
261
  "special": false
262
  },
263
  {
264
- "id": 57544,
265
  "content": "</s_Gear>",
266
  "single_word": false,
267
  "lstrip": false,
@@ -270,7 +441,7 @@
270
  "special": false
271
  },
272
  {
273
- "id": 57545,
274
  "content": "<s_Size>",
275
  "single_word": false,
276
  "lstrip": false,
@@ -279,7 +450,7 @@
279
  "special": false
280
  },
281
  {
282
- "id": 57546,
283
  "content": "</s_Size>",
284
  "single_word": false,
285
  "lstrip": false,
@@ -288,7 +459,7 @@
288
  "special": false
289
  },
290
  {
291
- "id": 57547,
292
  "content": "<s_Cooking>",
293
  "single_word": false,
294
  "lstrip": false,
@@ -297,7 +468,7 @@
297
  "special": false
298
  },
299
  {
300
- "id": 57548,
301
  "content": "</s_Cooking>",
302
  "single_word": false,
303
  "lstrip": false,
@@ -306,7 +477,7 @@
306
  "special": false
307
  },
308
  {
309
- "id": 57549,
310
  "content": "<s_Provenance>",
311
  "single_word": false,
312
  "lstrip": false,
@@ -315,7 +486,7 @@
315
  "special": false
316
  },
317
  {
318
- "id": 57550,
319
  "content": "</s_Provenance>",
320
  "single_word": false,
321
  "lstrip": false,
 
2
  "version": "1.0",
3
  "truncation": {
4
  "direction": "Right",
5
+ "max_length": 128,
6
  "strategy": "LongestFirst",
7
  "stride": 0
8
  },
9
  "padding": {
10
  "strategy": {
11
+ "Fixed": 128
12
  },
13
  "direction": "Right",
14
  "pad_to_multiple_of": null,
 
73
  },
74
  {
75
  "id": 57523,
76
+ "content": "</s_answer>",
77
  "single_word": false,
78
  "lstrip": false,
79
  "rstrip": false,
 
82
  },
83
  {
84
  "id": 57524,
85
+ "content": "</s_question>",
86
  "single_word": false,
87
  "lstrip": false,
88
  "rstrip": false,
 
91
  },
92
  {
93
  "id": 57525,
94
+ "content": "<no/>",
95
+ "single_word": false,
96
+ "lstrip": false,
97
+ "rstrip": false,
98
+ "normalized": false,
99
+ "special": true
100
+ },
101
+ {
102
+ "id": 57526,
103
+ "content": "<s_answer>",
104
+ "single_word": false,
105
+ "lstrip": false,
106
+ "rstrip": false,
107
+ "normalized": false,
108
+ "special": true
109
+ },
110
+ {
111
+ "id": 57527,
112
+ "content": "<s_docvqa>",
113
+ "single_word": false,
114
+ "lstrip": false,
115
+ "rstrip": false,
116
+ "normalized": false,
117
+ "special": true
118
+ },
119
+ {
120
+ "id": 57528,
121
+ "content": "<s_iitcdip>",
122
+ "single_word": false,
123
+ "lstrip": false,
124
+ "rstrip": false,
125
+ "normalized": false,
126
+ "special": true
127
+ },
128
+ {
129
+ "id": 57529,
130
+ "content": "<s_question>",
131
+ "single_word": false,
132
+ "lstrip": false,
133
+ "rstrip": false,
134
+ "normalized": false,
135
+ "special": true
136
+ },
137
+ {
138
+ "id": 57530,
139
+ "content": "<s_synthdog>",
140
+ "single_word": false,
141
+ "lstrip": false,
142
+ "rstrip": false,
143
+ "normalized": false,
144
+ "special": true
145
+ },
146
+ {
147
+ "id": 57531,
148
+ "content": "<yes/>",
149
+ "single_word": false,
150
+ "lstrip": false,
151
+ "rstrip": false,
152
+ "normalized": false,
153
+ "special": true
154
+ },
155
+ {
156
+ "id": 57532,
157
+ "content": "<O/>",
158
+ "single_word": false,
159
+ "lstrip": false,
160
+ "rstrip": false,
161
+ "normalized": true,
162
+ "special": false
163
+ },
164
+ {
165
+ "id": 57533,
166
+ "content": "<Fish/>",
167
+ "single_word": false,
168
+ "lstrip": false,
169
+ "rstrip": false,
170
+ "normalized": true,
171
+ "special": false
172
+ },
173
+ {
174
+ "id": 57534,
175
+ "content": "<Peche/>",
176
+ "single_word": false,
177
+ "lstrip": false,
178
+ "rstrip": false,
179
+ "normalized": true,
180
+ "special": false
181
+ },
182
+ {
183
+ "id": 57535,
184
+ "content": "Cooking/>",
185
+ "single_word": false,
186
+ "lstrip": false,
187
+ "rstrip": false,
188
+ "normalized": true,
189
+ "special": false
190
+ },
191
+ {
192
+ "id": 57536,
193
+ "content": "<Packing/>",
194
+ "single_word": false,
195
+ "lstrip": false,
196
+ "rstrip": false,
197
+ "normalized": true,
198
+ "special": false
199
+ },
200
+ {
201
+ "id": 57537,
202
+ "content": "<Size/>",
203
+ "single_word": false,
204
+ "lstrip": false,
205
+ "rstrip": false,
206
+ "normalized": true,
207
+ "special": false
208
+ },
209
+ {
210
+ "id": 57538,
211
+ "content": "<Latin/>",
212
+ "single_word": false,
213
+ "lstrip": false,
214
+ "rstrip": false,
215
+ "normalized": true,
216
+ "special": false
217
+ },
218
+ {
219
+ "id": 57539,
220
+ "content": "<Gear/>",
221
+ "single_word": false,
222
+ "lstrip": false,
223
+ "rstrip": false,
224
+ "normalized": true,
225
+ "special": false
226
+ },
227
+ {
228
+ "id": 57540,
229
+ "content": "<Zone/>",
230
+ "single_word": false,
231
+ "lstrip": false,
232
+ "rstrip": false,
233
+ "normalized": true,
234
+ "special": false
235
+ },
236
+ {
237
+ "id": 57541,
238
+ "content": "<ZoneCode/>",
239
+ "single_word": false,
240
+ "lstrip": false,
241
+ "rstrip": false,
242
+ "normalized": true,
243
+ "special": false
244
+ },
245
+ {
246
+ "id": 57542,
247
+ "content": "<SZone/>",
248
+ "single_word": false,
249
+ "lstrip": false,
250
+ "rstrip": false,
251
+ "normalized": true,
252
+ "special": false
253
+ },
254
+ {
255
+ "id": 57543,
256
+ "content": "<SzoneCode/>",
257
+ "single_word": false,
258
+ "lstrip": false,
259
+ "rstrip": false,
260
+ "normalized": true,
261
+ "special": false
262
+ },
263
+ {
264
+ "id": 57544,
265
  "content": "<s_ner>",
266
  "single_word": false,
267
  "lstrip": false,
 
270
  "special": false
271
  },
272
  {
273
+ "id": 57545,
274
  "content": "</s_ner>",
275
  "single_word": false,
276
  "lstrip": false,
 
279
  "special": false
280
  },
281
  {
282
+ "id": 57546,
283
  "content": "<s_Peche>",
284
  "single_word": false,
285
  "lstrip": false,
 
288
  "special": false
289
  },
290
  {
291
+ "id": 57547,
292
  "content": "</s_Peche>",
293
  "single_word": false,
294
  "lstrip": false,
 
297
  "special": false
298
  },
299
  {
300
+ "id": 57548,
301
  "content": "<s_Fish>",
302
  "single_word": false,
303
  "lstrip": false,
 
306
  "special": false
307
  },
308
  {
309
+ "id": 57549,
310
  "content": "</s_Fish>",
311
  "single_word": false,
312
  "lstrip": false,
 
315
  "special": false
316
  },
317
  {
318
+ "id": 57550,
319
  "content": "<s_Latin>",
320
  "single_word": false,
321
  "lstrip": false,
 
324
  "special": false
325
  },
326
  {
327
+ "id": 57551,
328
  "content": "</s_Latin>",
329
  "single_word": false,
330
  "lstrip": false,
 
333
  "special": false
334
  },
335
  {
336
+ "id": 57552,
337
  "content": "<s_ZoneCode>",
338
  "single_word": false,
339
  "lstrip": false,
 
342
  "special": false
343
  },
344
  {
345
+ "id": 57553,
346
  "content": "</s_ZoneCode>",
347
  "single_word": false,
348
  "lstrip": false,
 
351
  "special": false
352
  },
353
  {
354
+ "id": 57554,
355
  "content": "<s_SzoneCode>",
356
  "single_word": false,
357
  "lstrip": false,
 
360
  "special": false
361
  },
362
  {
363
+ "id": 57555,
364
  "content": "</s_SzoneCode>",
365
  "single_word": false,
366
  "lstrip": false,
 
369
  "special": false
370
  },
371
  {
372
+ "id": 57556,
373
  "content": "<s_Zone>",
374
  "single_word": false,
375
  "lstrip": false,
 
378
  "special": false
379
  },
380
  {
381
+ "id": 57557,
382
  "content": "</s_Zone>",
383
  "single_word": false,
384
  "lstrip": false,
 
387
  "special": false
388
  },
389
  {
390
+ "id": 57558,
391
  "content": "<s_Szone>",
392
  "single_word": false,
393
  "lstrip": false,
 
396
  "special": false
397
  },
398
  {
399
+ "id": 57559,
400
  "content": "</s_Szone>",
401
  "single_word": false,
402
  "lstrip": false,
 
405
  "special": false
406
  },
407
  {
408
+ "id": 57560,
409
  "content": "<s_Packing>",
410
  "single_word": false,
411
  "lstrip": false,
 
414
  "special": false
415
  },
416
  {
417
+ "id": 57561,
418
  "content": "</s_Packing>",
419
  "single_word": false,
420
  "lstrip": false,
 
423
  "special": false
424
  },
425
  {
426
+ "id": 57562,
427
  "content": "<s_Gear>",
428
  "single_word": false,
429
  "lstrip": false,
 
432
  "special": false
433
  },
434
  {
435
+ "id": 57563,
436
  "content": "</s_Gear>",
437
  "single_word": false,
438
  "lstrip": false,
 
441
  "special": false
442
  },
443
  {
444
+ "id": 57564,
445
  "content": "<s_Size>",
446
  "single_word": false,
447
  "lstrip": false,
 
450
  "special": false
451
  },
452
  {
453
+ "id": 57565,
454
  "content": "</s_Size>",
455
  "single_word": false,
456
  "lstrip": false,
 
459
  "special": false
460
  },
461
  {
462
+ "id": 57566,
463
  "content": "<s_Cooking>",
464
  "single_word": false,
465
  "lstrip": false,
 
468
  "special": false
469
  },
470
  {
471
+ "id": 57567,
472
  "content": "</s_Cooking>",
473
  "single_word": false,
474
  "lstrip": false,
 
477
  "special": false
478
  },
479
  {
480
+ "id": 57568,
481
  "content": "<s_Provenance>",
482
  "single_word": false,
483
  "lstrip": false,
 
486
  "special": false
487
  },
488
  {
489
+ "id": 57569,
490
  "content": "</s_Provenance>",
491
  "single_word": false,
492
  "lstrip": false,
tokenizer_config.json CHANGED
@@ -11,7 +11,7 @@
11
  "single_word": false
12
  },
13
  "model_max_length": 1000000000000000019884624838656,
14
- "name_or_path": "naver-clova-ix/donut-base",
15
  "pad_token": "<pad>",
16
  "processor_class": "DonutProcessor",
17
  "sep_token": "</s>",
 
11
  "single_word": false
12
  },
13
  "model_max_length": 1000000000000000019884624838656,
14
+ "name_or_path": "naver-clova-ix/donut-base-finetuned-docvqa",
15
  "pad_token": "<pad>",
16
  "processor_class": "DonutProcessor",
17
  "sep_token": "</s>",