mychen76 commited on
Commit
88a68a2
1 Parent(s): abecec5

Training started

Browse files
added_tokens.json CHANGED
@@ -1,39 +1,7 @@
1
  {
2
- "</s_date>": 57558,
3
- "</s_ignore>": 57556,
4
- "</s_item_key>": 57554,
5
- "</s_item_name>": 57552,
6
- "</s_item_quantity>": 57550,
7
- "</s_item_value>": 57548,
8
- "</s_line_items>": 57546,
9
- "</s_others>": 57542,
10
- "</s_store_addr>": 57540,
11
- "</s_store_name>": 57538,
12
- "</s_subtotal>": 57536,
13
- "</s_tax>": 57534,
14
- "</s_telephone>": 57532,
15
- "</s_text>": 57544,
16
- "</s_time>": 57530,
17
- "</s_tips>": 57528,
18
- "</s_total>": 57526,
19
- "<s_date>": 57557,
20
- "<s_ignore>": 57555,
21
  "<s_iitcdip>": 57523,
22
- "<s_item_key>": 57553,
23
- "<s_item_name>": 57551,
24
- "<s_item_quantity>": 57549,
25
- "<s_item_value>": 57547,
26
- "<s_line_items>": 57545,
27
- "<s_others>": 57541,
28
- "<s_store_addr>": 57539,
29
- "<s_store_name>": 57537,
30
- "<s_subtotal>": 57535,
31
  "<s_synthdog>": 57524,
32
- "<s_tax>": 57533,
33
- "<s_telephone>": 57531,
34
- "<s_text>": 57543,
35
- "<s_time>": 57529,
36
- "<s_tips>": 57527,
37
- "<s_total>": 57525,
38
  "<sep/>": 57522
39
  }
 
1
  {
2
+ "</s_receipt>": 57526,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  "<s_iitcdip>": 57523,
4
+ "<s_receipt>": 57525,
 
 
 
 
 
 
 
 
5
  "<s_synthdog>": 57524,
 
 
 
 
 
 
6
  "<sep/>": 57522
7
  }
preprocessor_config.json CHANGED
@@ -20,7 +20,7 @@
20
  "resample": 2,
21
  "rescale_factor": 0.00392156862745098,
22
  "size": [
23
- 960,
24
- 720
25
  ]
26
  }
 
20
  "resample": 2,
21
  "rescale_factor": 0.00392156862745098,
22
  "size": [
23
+ 1280,
24
+ 1920
25
  ]
26
  }
special_tokens_map.json CHANGED
@@ -1,41 +1,7 @@
1
  {
2
  "additional_special_tokens": [
3
- "<s_total>",
4
- "</s_total>",
5
- "<s_tips>",
6
- "</s_tips>",
7
- "<s_time>",
8
- "</s_time>",
9
- "<s_telephone>",
10
- "</s_telephone>",
11
- "<s_tax>",
12
- "</s_tax>",
13
- "<s_subtotal>",
14
- "</s_subtotal>",
15
- "<s_store_name>",
16
- "</s_store_name>",
17
- "<s_store_addr>",
18
- "</s_store_addr>",
19
- "<s_others>",
20
- "</s_others>",
21
- "<s_text>",
22
- "</s_text>",
23
- "<s_line_items>",
24
- "</s_line_items>",
25
- "<s_item_value>",
26
- "</s_item_value>",
27
- "<s_item_quantity>",
28
- "</s_item_quantity>",
29
- "<s_item_name>",
30
- "</s_item_name>",
31
- "<s_item_key>",
32
- "</s_item_key>",
33
- "<s_ignore>",
34
- "</s_ignore>",
35
- "<s_date>",
36
- "</s_date>",
37
- "<s>",
38
- "</s>"
39
  ],
40
  "bos_token": "<s>",
41
  "cls_token": "<s>",
 
1
  {
2
  "additional_special_tokens": [
3
+ "<s_iitcdip>",
4
+ "<s_synthdog>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  ],
6
  "bos_token": "<s>",
7
  "cls_token": "<s>",
tokenizer.json CHANGED
@@ -1,21 +1,7 @@
1
  {
2
  "version": "1.0",
3
- "truncation": {
4
- "direction": "Right",
5
- "max_length": 512,
6
- "strategy": "LongestFirst",
7
- "stride": 0
8
- },
9
- "padding": {
10
- "strategy": {
11
- "Fixed": 512
12
- },
13
- "direction": "Right",
14
- "pad_to_multiple_of": null,
15
- "pad_id": 1,
16
- "pad_type_id": 0,
17
- "pad_token": "<pad>"
18
- },
19
  "added_tokens": [
20
  {
21
  "id": 0,
@@ -91,309 +77,21 @@
91
  },
92
  {
93
  "id": 57525,
94
- "content": "<s_total>",
95
  "single_word": false,
96
  "lstrip": false,
97
  "rstrip": false,
98
- "normalized": false,
99
- "special": true
100
  },
101
  {
102
  "id": 57526,
103
- "content": "</s_total>",
104
- "single_word": false,
105
- "lstrip": false,
106
- "rstrip": false,
107
- "normalized": false,
108
- "special": true
109
- },
110
- {
111
- "id": 57527,
112
- "content": "<s_tips>",
113
- "single_word": false,
114
- "lstrip": false,
115
- "rstrip": false,
116
- "normalized": false,
117
- "special": true
118
- },
119
- {
120
- "id": 57528,
121
- "content": "</s_tips>",
122
- "single_word": false,
123
- "lstrip": false,
124
- "rstrip": false,
125
- "normalized": false,
126
- "special": true
127
- },
128
- {
129
- "id": 57529,
130
- "content": "<s_time>",
131
- "single_word": false,
132
- "lstrip": false,
133
- "rstrip": false,
134
- "normalized": false,
135
- "special": true
136
- },
137
- {
138
- "id": 57530,
139
- "content": "</s_time>",
140
- "single_word": false,
141
- "lstrip": false,
142
- "rstrip": false,
143
- "normalized": false,
144
- "special": true
145
- },
146
- {
147
- "id": 57531,
148
- "content": "<s_telephone>",
149
- "single_word": false,
150
- "lstrip": false,
151
- "rstrip": false,
152
- "normalized": false,
153
- "special": true
154
- },
155
- {
156
- "id": 57532,
157
- "content": "</s_telephone>",
158
- "single_word": false,
159
- "lstrip": false,
160
- "rstrip": false,
161
- "normalized": false,
162
- "special": true
163
- },
164
- {
165
- "id": 57533,
166
- "content": "<s_tax>",
167
- "single_word": false,
168
- "lstrip": false,
169
- "rstrip": false,
170
- "normalized": false,
171
- "special": true
172
- },
173
- {
174
- "id": 57534,
175
- "content": "</s_tax>",
176
- "single_word": false,
177
- "lstrip": false,
178
- "rstrip": false,
179
- "normalized": false,
180
- "special": true
181
- },
182
- {
183
- "id": 57535,
184
- "content": "<s_subtotal>",
185
- "single_word": false,
186
- "lstrip": false,
187
- "rstrip": false,
188
- "normalized": false,
189
- "special": true
190
- },
191
- {
192
- "id": 57536,
193
- "content": "</s_subtotal>",
194
  "single_word": false,
195
  "lstrip": false,
196
  "rstrip": false,
197
- "normalized": false,
198
- "special": true
199
- },
200
- {
201
- "id": 57537,
202
- "content": "<s_store_name>",
203
- "single_word": false,
204
- "lstrip": false,
205
- "rstrip": false,
206
- "normalized": false,
207
- "special": true
208
- },
209
- {
210
- "id": 57538,
211
- "content": "</s_store_name>",
212
- "single_word": false,
213
- "lstrip": false,
214
- "rstrip": false,
215
- "normalized": false,
216
- "special": true
217
- },
218
- {
219
- "id": 57539,
220
- "content": "<s_store_addr>",
221
- "single_word": false,
222
- "lstrip": false,
223
- "rstrip": false,
224
- "normalized": false,
225
- "special": true
226
- },
227
- {
228
- "id": 57540,
229
- "content": "</s_store_addr>",
230
- "single_word": false,
231
- "lstrip": false,
232
- "rstrip": false,
233
- "normalized": false,
234
- "special": true
235
- },
236
- {
237
- "id": 57541,
238
- "content": "<s_others>",
239
- "single_word": false,
240
- "lstrip": false,
241
- "rstrip": false,
242
- "normalized": false,
243
- "special": true
244
- },
245
- {
246
- "id": 57542,
247
- "content": "</s_others>",
248
- "single_word": false,
249
- "lstrip": false,
250
- "rstrip": false,
251
- "normalized": false,
252
- "special": true
253
- },
254
- {
255
- "id": 57543,
256
- "content": "<s_text>",
257
- "single_word": false,
258
- "lstrip": false,
259
- "rstrip": false,
260
- "normalized": false,
261
- "special": true
262
- },
263
- {
264
- "id": 57544,
265
- "content": "</s_text>",
266
- "single_word": false,
267
- "lstrip": false,
268
- "rstrip": false,
269
- "normalized": false,
270
- "special": true
271
- },
272
- {
273
- "id": 57545,
274
- "content": "<s_line_items>",
275
- "single_word": false,
276
- "lstrip": false,
277
- "rstrip": false,
278
- "normalized": false,
279
- "special": true
280
- },
281
- {
282
- "id": 57546,
283
- "content": "</s_line_items>",
284
- "single_word": false,
285
- "lstrip": false,
286
- "rstrip": false,
287
- "normalized": false,
288
- "special": true
289
- },
290
- {
291
- "id": 57547,
292
- "content": "<s_item_value>",
293
- "single_word": false,
294
- "lstrip": false,
295
- "rstrip": false,
296
- "normalized": false,
297
- "special": true
298
- },
299
- {
300
- "id": 57548,
301
- "content": "</s_item_value>",
302
- "single_word": false,
303
- "lstrip": false,
304
- "rstrip": false,
305
- "normalized": false,
306
- "special": true
307
- },
308
- {
309
- "id": 57549,
310
- "content": "<s_item_quantity>",
311
- "single_word": false,
312
- "lstrip": false,
313
- "rstrip": false,
314
- "normalized": false,
315
- "special": true
316
- },
317
- {
318
- "id": 57550,
319
- "content": "</s_item_quantity>",
320
- "single_word": false,
321
- "lstrip": false,
322
- "rstrip": false,
323
- "normalized": false,
324
- "special": true
325
- },
326
- {
327
- "id": 57551,
328
- "content": "<s_item_name>",
329
- "single_word": false,
330
- "lstrip": false,
331
- "rstrip": false,
332
- "normalized": false,
333
- "special": true
334
- },
335
- {
336
- "id": 57552,
337
- "content": "</s_item_name>",
338
- "single_word": false,
339
- "lstrip": false,
340
- "rstrip": false,
341
- "normalized": false,
342
- "special": true
343
- },
344
- {
345
- "id": 57553,
346
- "content": "<s_item_key>",
347
- "single_word": false,
348
- "lstrip": false,
349
- "rstrip": false,
350
- "normalized": false,
351
- "special": true
352
- },
353
- {
354
- "id": 57554,
355
- "content": "</s_item_key>",
356
- "single_word": false,
357
- "lstrip": false,
358
- "rstrip": false,
359
- "normalized": false,
360
- "special": true
361
- },
362
- {
363
- "id": 57555,
364
- "content": "<s_ignore>",
365
- "single_word": false,
366
- "lstrip": false,
367
- "rstrip": false,
368
- "normalized": false,
369
- "special": true
370
- },
371
- {
372
- "id": 57556,
373
- "content": "</s_ignore>",
374
- "single_word": false,
375
- "lstrip": false,
376
- "rstrip": false,
377
- "normalized": false,
378
- "special": true
379
- },
380
- {
381
- "id": 57557,
382
- "content": "<s_date>",
383
- "single_word": false,
384
- "lstrip": false,
385
- "rstrip": false,
386
- "normalized": false,
387
- "special": true
388
- },
389
- {
390
- "id": 57558,
391
- "content": "</s_date>",
392
- "single_word": false,
393
- "lstrip": false,
394
- "rstrip": false,
395
- "normalized": false,
396
- "special": true
397
  }
398
  ],
399
  "normalizer": {
 
1
  {
2
  "version": "1.0",
3
+ "truncation": null,
4
+ "padding": null,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  "added_tokens": [
6
  {
7
  "id": 0,
 
77
  },
78
  {
79
  "id": 57525,
80
+ "content": "<s_receipt>",
81
  "single_word": false,
82
  "lstrip": false,
83
  "rstrip": false,
84
+ "normalized": true,
85
+ "special": false
86
  },
87
  {
88
  "id": 57526,
89
+ "content": "</s_receipt>",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
  "single_word": false,
91
  "lstrip": false,
92
  "rstrip": false,
93
+ "normalized": true,
94
+ "special": false
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
  }
96
  ],
97
  "normalizer": {