mychen76 commited on
Commit
5b052c1
1 Parent(s): cd240be

Training done

Browse files
Files changed (2) hide show
  1. added_tokens.json +31 -41
  2. tokenizer.json +31 -121
added_tokens.json CHANGED
@@ -1,46 +1,36 @@
1
  {
2
- "</s_None>": 57564,
3
- "</s_client>": 57534,
4
- "</s_client_tax_id>": 57538,
5
- "</s_header>": 57526,
6
- "</s_iban>": 57540,
7
- "</s_invoice_date>": 57530,
8
- "</s_invoice_no>": 57528,
9
- "</s_item_desc>": 57544,
10
- "</s_item_gross_worth>": 57554,
11
- "</s_item_net_price>": 57548,
12
- "</s_item_net_worth>": 57550,
13
- "</s_item_qty>": 57546,
14
- "</s_item_vat>": 57552,
15
- "</s_items>": 57542,
16
- "</s_seller>": 57532,
17
- "</s_seller_tax_id>": 57536,
18
- "</s_summary>": 57556,
19
- "</s_total_gross_worth>": 57562,
20
- "</s_total_net_worth>": 57558,
21
- "</s_total_vat>": 57560,
22
- "<s_None>": 57563,
23
- "<s_client>": 57533,
24
- "<s_client_tax_id>": 57537,
25
- "<s_cord-v2>": 57565,
26
- "<s_header>": 57525,
27
- "<s_iban>": 57539,
28
  "<s_iitcdip>": 57523,
29
- "<s_invoice_date>": 57529,
30
- "<s_invoice_no>": 57527,
31
- "<s_item_desc>": 57543,
32
- "<s_item_gross_worth>": 57553,
33
- "<s_item_net_price>": 57547,
34
- "<s_item_net_worth>": 57549,
35
- "<s_item_qty>": 57545,
36
- "<s_item_vat>": 57551,
37
- "<s_items>": 57541,
38
- "<s_seller>": 57531,
39
- "<s_seller_tax_id>": 57535,
40
- "<s_summary>": 57555,
41
  "<s_synthdog>": 57524,
42
- "<s_total_gross_worth>": 57561,
43
- "<s_total_net_worth>": 57557,
44
- "<s_total_vat>": 57559,
 
 
45
  "<sep/>": 57522
46
  }
 
1
  {
2
+ "</s_date>": 57533,
3
+ "</s_ignore>": 57543,
4
+ "</s_item_key>": 57549,
5
+ "</s_item_name>": 57551,
6
+ "</s_item_quantity>": 57555,
7
+ "</s_item_value>": 57553,
8
+ "</s_line_items>": 57547,
9
+ "</s_store_addr>": 57529,
10
+ "</s_store_name>": 57527,
11
+ "</s_subtotal>": 57537,
12
+ "</s_tax>": 57539,
13
+ "</s_telephone>": 57531,
14
+ "</s_time>": 57535,
15
+ "</s_tips>": 57545,
16
+ "</s_total>": 57541,
17
+ "<s_date>": 57532,
18
+ "<s_ignore>": 57542,
 
 
 
 
 
 
 
 
 
19
  "<s_iitcdip>": 57523,
20
+ "<s_item_key>": 57548,
21
+ "<s_item_name>": 57550,
22
+ "<s_item_quantity>": 57554,
23
+ "<s_item_value>": 57552,
24
+ "<s_line_items>": 57546,
25
+ "<s_receipt>": 57525,
26
+ "<s_store_addr>": 57528,
27
+ "<s_store_name>": 57526,
28
+ "<s_subtotal>": 57536,
 
 
 
29
  "<s_synthdog>": 57524,
30
+ "<s_tax>": 57538,
31
+ "<s_telephone>": 57530,
32
+ "<s_time>": 57534,
33
+ "<s_tips>": 57544,
34
+ "<s_total>": 57540,
35
  "<sep/>": 57522
36
  }
tokenizer.json CHANGED
@@ -91,7 +91,7 @@
91
  },
92
  {
93
  "id": 57525,
94
- "content": "<s_header>",
95
  "single_word": false,
96
  "lstrip": false,
97
  "rstrip": false,
@@ -100,7 +100,7 @@
100
  },
101
  {
102
  "id": 57526,
103
- "content": "</s_header>",
104
  "single_word": false,
105
  "lstrip": false,
106
  "rstrip": false,
@@ -109,7 +109,7 @@
109
  },
110
  {
111
  "id": 57527,
112
- "content": "<s_invoice_no>",
113
  "single_word": false,
114
  "lstrip": false,
115
  "rstrip": false,
@@ -118,7 +118,7 @@
118
  },
119
  {
120
  "id": 57528,
121
- "content": "</s_invoice_no>",
122
  "single_word": false,
123
  "lstrip": false,
124
  "rstrip": false,
@@ -127,7 +127,7 @@
127
  },
128
  {
129
  "id": 57529,
130
- "content": "<s_invoice_date>",
131
  "single_word": false,
132
  "lstrip": false,
133
  "rstrip": false,
@@ -136,7 +136,7 @@
136
  },
137
  {
138
  "id": 57530,
139
- "content": "</s_invoice_date>",
140
  "single_word": false,
141
  "lstrip": false,
142
  "rstrip": false,
@@ -145,7 +145,7 @@
145
  },
146
  {
147
  "id": 57531,
148
- "content": "<s_seller>",
149
  "single_word": false,
150
  "lstrip": false,
151
  "rstrip": false,
@@ -154,7 +154,7 @@
154
  },
155
  {
156
  "id": 57532,
157
- "content": "</s_seller>",
158
  "single_word": false,
159
  "lstrip": false,
160
  "rstrip": false,
@@ -163,7 +163,7 @@
163
  },
164
  {
165
  "id": 57533,
166
- "content": "<s_client>",
167
  "single_word": false,
168
  "lstrip": false,
169
  "rstrip": false,
@@ -172,7 +172,7 @@
172
  },
173
  {
174
  "id": 57534,
175
- "content": "</s_client>",
176
  "single_word": false,
177
  "lstrip": false,
178
  "rstrip": false,
@@ -181,7 +181,7 @@
181
  },
182
  {
183
  "id": 57535,
184
- "content": "<s_seller_tax_id>",
185
  "single_word": false,
186
  "lstrip": false,
187
  "rstrip": false,
@@ -190,7 +190,7 @@
190
  },
191
  {
192
  "id": 57536,
193
- "content": "</s_seller_tax_id>",
194
  "single_word": false,
195
  "lstrip": false,
196
  "rstrip": false,
@@ -199,7 +199,7 @@
199
  },
200
  {
201
  "id": 57537,
202
- "content": "<s_client_tax_id>",
203
  "single_word": false,
204
  "lstrip": false,
205
  "rstrip": false,
@@ -208,7 +208,7 @@
208
  },
209
  {
210
  "id": 57538,
211
- "content": "</s_client_tax_id>",
212
  "single_word": false,
213
  "lstrip": false,
214
  "rstrip": false,
@@ -217,7 +217,7 @@
217
  },
218
  {
219
  "id": 57539,
220
- "content": "<s_iban>",
221
  "single_word": false,
222
  "lstrip": false,
223
  "rstrip": false,
@@ -226,7 +226,7 @@
226
  },
227
  {
228
  "id": 57540,
229
- "content": "</s_iban>",
230
  "single_word": false,
231
  "lstrip": false,
232
  "rstrip": false,
@@ -235,7 +235,7 @@
235
  },
236
  {
237
  "id": 57541,
238
- "content": "<s_items>",
239
  "single_word": false,
240
  "lstrip": false,
241
  "rstrip": false,
@@ -244,7 +244,7 @@
244
  },
245
  {
246
  "id": 57542,
247
- "content": "</s_items>",
248
  "single_word": false,
249
  "lstrip": false,
250
  "rstrip": false,
@@ -253,7 +253,7 @@
253
  },
254
  {
255
  "id": 57543,
256
- "content": "<s_item_desc>",
257
  "single_word": false,
258
  "lstrip": false,
259
  "rstrip": false,
@@ -262,7 +262,7 @@
262
  },
263
  {
264
  "id": 57544,
265
- "content": "</s_item_desc>",
266
  "single_word": false,
267
  "lstrip": false,
268
  "rstrip": false,
@@ -271,7 +271,7 @@
271
  },
272
  {
273
  "id": 57545,
274
- "content": "<s_item_qty>",
275
  "single_word": false,
276
  "lstrip": false,
277
  "rstrip": false,
@@ -280,7 +280,7 @@
280
  },
281
  {
282
  "id": 57546,
283
- "content": "</s_item_qty>",
284
  "single_word": false,
285
  "lstrip": false,
286
  "rstrip": false,
@@ -289,7 +289,7 @@
289
  },
290
  {
291
  "id": 57547,
292
- "content": "<s_item_net_price>",
293
  "single_word": false,
294
  "lstrip": false,
295
  "rstrip": false,
@@ -298,7 +298,7 @@
298
  },
299
  {
300
  "id": 57548,
301
- "content": "</s_item_net_price>",
302
  "single_word": false,
303
  "lstrip": false,
304
  "rstrip": false,
@@ -307,7 +307,7 @@
307
  },
308
  {
309
  "id": 57549,
310
- "content": "<s_item_net_worth>",
311
  "single_word": false,
312
  "lstrip": false,
313
  "rstrip": false,
@@ -316,7 +316,7 @@
316
  },
317
  {
318
  "id": 57550,
319
- "content": "</s_item_net_worth>",
320
  "single_word": false,
321
  "lstrip": false,
322
  "rstrip": false,
@@ -325,7 +325,7 @@
325
  },
326
  {
327
  "id": 57551,
328
- "content": "<s_item_vat>",
329
  "single_word": false,
330
  "lstrip": false,
331
  "rstrip": false,
@@ -334,7 +334,7 @@
334
  },
335
  {
336
  "id": 57552,
337
- "content": "</s_item_vat>",
338
  "single_word": false,
339
  "lstrip": false,
340
  "rstrip": false,
@@ -343,7 +343,7 @@
343
  },
344
  {
345
  "id": 57553,
346
- "content": "<s_item_gross_worth>",
347
  "single_word": false,
348
  "lstrip": false,
349
  "rstrip": false,
@@ -352,7 +352,7 @@
352
  },
353
  {
354
  "id": 57554,
355
- "content": "</s_item_gross_worth>",
356
  "single_word": false,
357
  "lstrip": false,
358
  "rstrip": false,
@@ -361,97 +361,7 @@
361
  },
362
  {
363
  "id": 57555,
364
- "content": "<s_summary>",
365
- "single_word": false,
366
- "lstrip": false,
367
- "rstrip": false,
368
- "normalized": true,
369
- "special": false
370
- },
371
- {
372
- "id": 57556,
373
- "content": "</s_summary>",
374
- "single_word": false,
375
- "lstrip": false,
376
- "rstrip": false,
377
- "normalized": true,
378
- "special": false
379
- },
380
- {
381
- "id": 57557,
382
- "content": "<s_total_net_worth>",
383
- "single_word": false,
384
- "lstrip": false,
385
- "rstrip": false,
386
- "normalized": true,
387
- "special": false
388
- },
389
- {
390
- "id": 57558,
391
- "content": "</s_total_net_worth>",
392
- "single_word": false,
393
- "lstrip": false,
394
- "rstrip": false,
395
- "normalized": true,
396
- "special": false
397
- },
398
- {
399
- "id": 57559,
400
- "content": "<s_total_vat>",
401
- "single_word": false,
402
- "lstrip": false,
403
- "rstrip": false,
404
- "normalized": true,
405
- "special": false
406
- },
407
- {
408
- "id": 57560,
409
- "content": "</s_total_vat>",
410
- "single_word": false,
411
- "lstrip": false,
412
- "rstrip": false,
413
- "normalized": true,
414
- "special": false
415
- },
416
- {
417
- "id": 57561,
418
- "content": "<s_total_gross_worth>",
419
- "single_word": false,
420
- "lstrip": false,
421
- "rstrip": false,
422
- "normalized": true,
423
- "special": false
424
- },
425
- {
426
- "id": 57562,
427
- "content": "</s_total_gross_worth>",
428
- "single_word": false,
429
- "lstrip": false,
430
- "rstrip": false,
431
- "normalized": true,
432
- "special": false
433
- },
434
- {
435
- "id": 57563,
436
- "content": "<s_None>",
437
- "single_word": false,
438
- "lstrip": false,
439
- "rstrip": false,
440
- "normalized": true,
441
- "special": false
442
- },
443
- {
444
- "id": 57564,
445
- "content": "</s_None>",
446
- "single_word": false,
447
- "lstrip": false,
448
- "rstrip": false,
449
- "normalized": true,
450
- "special": false
451
- },
452
- {
453
- "id": 57565,
454
- "content": "<s_cord-v2>",
455
  "single_word": false,
456
  "lstrip": false,
457
  "rstrip": false,
 
91
  },
92
  {
93
  "id": 57525,
94
+ "content": "<s_receipt>",
95
  "single_word": false,
96
  "lstrip": false,
97
  "rstrip": false,
 
100
  },
101
  {
102
  "id": 57526,
103
+ "content": "<s_store_name>",
104
  "single_word": false,
105
  "lstrip": false,
106
  "rstrip": false,
 
109
  },
110
  {
111
  "id": 57527,
112
+ "content": "</s_store_name>",
113
  "single_word": false,
114
  "lstrip": false,
115
  "rstrip": false,
 
118
  },
119
  {
120
  "id": 57528,
121
+ "content": "<s_store_addr>",
122
  "single_word": false,
123
  "lstrip": false,
124
  "rstrip": false,
 
127
  },
128
  {
129
  "id": 57529,
130
+ "content": "</s_store_addr>",
131
  "single_word": false,
132
  "lstrip": false,
133
  "rstrip": false,
 
136
  },
137
  {
138
  "id": 57530,
139
+ "content": "<s_telephone>",
140
  "single_word": false,
141
  "lstrip": false,
142
  "rstrip": false,
 
145
  },
146
  {
147
  "id": 57531,
148
+ "content": "</s_telephone>",
149
  "single_word": false,
150
  "lstrip": false,
151
  "rstrip": false,
 
154
  },
155
  {
156
  "id": 57532,
157
+ "content": "<s_date>",
158
  "single_word": false,
159
  "lstrip": false,
160
  "rstrip": false,
 
163
  },
164
  {
165
  "id": 57533,
166
+ "content": "</s_date>",
167
  "single_word": false,
168
  "lstrip": false,
169
  "rstrip": false,
 
172
  },
173
  {
174
  "id": 57534,
175
+ "content": "<s_time>",
176
  "single_word": false,
177
  "lstrip": false,
178
  "rstrip": false,
 
181
  },
182
  {
183
  "id": 57535,
184
+ "content": "</s_time>",
185
  "single_word": false,
186
  "lstrip": false,
187
  "rstrip": false,
 
190
  },
191
  {
192
  "id": 57536,
193
+ "content": "<s_subtotal>",
194
  "single_word": false,
195
  "lstrip": false,
196
  "rstrip": false,
 
199
  },
200
  {
201
  "id": 57537,
202
+ "content": "</s_subtotal>",
203
  "single_word": false,
204
  "lstrip": false,
205
  "rstrip": false,
 
208
  },
209
  {
210
  "id": 57538,
211
+ "content": "<s_tax>",
212
  "single_word": false,
213
  "lstrip": false,
214
  "rstrip": false,
 
217
  },
218
  {
219
  "id": 57539,
220
+ "content": "</s_tax>",
221
  "single_word": false,
222
  "lstrip": false,
223
  "rstrip": false,
 
226
  },
227
  {
228
  "id": 57540,
229
+ "content": "<s_total>",
230
  "single_word": false,
231
  "lstrip": false,
232
  "rstrip": false,
 
235
  },
236
  {
237
  "id": 57541,
238
+ "content": "</s_total>",
239
  "single_word": false,
240
  "lstrip": false,
241
  "rstrip": false,
 
244
  },
245
  {
246
  "id": 57542,
247
+ "content": "<s_ignore>",
248
  "single_word": false,
249
  "lstrip": false,
250
  "rstrip": false,
 
253
  },
254
  {
255
  "id": 57543,
256
+ "content": "</s_ignore>",
257
  "single_word": false,
258
  "lstrip": false,
259
  "rstrip": false,
 
262
  },
263
  {
264
  "id": 57544,
265
+ "content": "<s_tips>",
266
  "single_word": false,
267
  "lstrip": false,
268
  "rstrip": false,
 
271
  },
272
  {
273
  "id": 57545,
274
+ "content": "</s_tips>",
275
  "single_word": false,
276
  "lstrip": false,
277
  "rstrip": false,
 
280
  },
281
  {
282
  "id": 57546,
283
+ "content": "<s_line_items>",
284
  "single_word": false,
285
  "lstrip": false,
286
  "rstrip": false,
 
289
  },
290
  {
291
  "id": 57547,
292
+ "content": "</s_line_items>",
293
  "single_word": false,
294
  "lstrip": false,
295
  "rstrip": false,
 
298
  },
299
  {
300
  "id": 57548,
301
+ "content": "<s_item_key>",
302
  "single_word": false,
303
  "lstrip": false,
304
  "rstrip": false,
 
307
  },
308
  {
309
  "id": 57549,
310
+ "content": "</s_item_key>",
311
  "single_word": false,
312
  "lstrip": false,
313
  "rstrip": false,
 
316
  },
317
  {
318
  "id": 57550,
319
+ "content": "<s_item_name>",
320
  "single_word": false,
321
  "lstrip": false,
322
  "rstrip": false,
 
325
  },
326
  {
327
  "id": 57551,
328
+ "content": "</s_item_name>",
329
  "single_word": false,
330
  "lstrip": false,
331
  "rstrip": false,
 
334
  },
335
  {
336
  "id": 57552,
337
+ "content": "<s_item_value>",
338
  "single_word": false,
339
  "lstrip": false,
340
  "rstrip": false,
 
343
  },
344
  {
345
  "id": 57553,
346
+ "content": "</s_item_value>",
347
  "single_word": false,
348
  "lstrip": false,
349
  "rstrip": false,
 
352
  },
353
  {
354
  "id": 57554,
355
+ "content": "<s_item_quantity>",
356
  "single_word": false,
357
  "lstrip": false,
358
  "rstrip": false,
 
361
  },
362
  {
363
  "id": 57555,
364
+ "content": "</s_item_quantity>",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
365
  "single_word": false,
366
  "lstrip": false,
367
  "rstrip": false,