Files changed (2) hide show
  1. added_tokens.json +5 -41
  2. tokenizer.json +4 -328
added_tokens.json CHANGED
@@ -1,46 +1,10 @@
1
  {
2
- "</s_None>": 57564,
3
- "</s_client>": 57534,
4
- "</s_client_tax_id>": 57538,
5
- "</s_header>": 57526,
6
- "</s_iban>": 57540,
7
- "</s_invoice_date>": 57530,
8
- "</s_invoice_no>": 57528,
9
- "</s_item_desc>": 57544,
10
- "</s_item_gross_worth>": 57554,
11
- "</s_item_net_price>": 57548,
12
- "</s_item_net_worth>": 57550,
13
- "</s_item_qty>": 57546,
14
- "</s_item_vat>": 57552,
15
- "</s_items>": 57542,
16
- "</s_seller>": 57532,
17
- "</s_seller_tax_id>": 57536,
18
- "</s_summary>": 57556,
19
- "</s_total_gross_worth>": 57562,
20
- "</s_total_net_worth>": 57558,
21
- "</s_total_vat>": 57560,
22
- "<s_None>": 57563,
23
- "<s_client>": 57533,
24
- "<s_client_tax_id>": 57537,
25
- "<s_cord-v2>": 57565,
26
- "<s_header>": 57525,
27
- "<s_iban>": 57539,
28
  "<s_iitcdip>": 57523,
29
- "<s_invoice_date>": 57529,
30
- "<s_invoice_no>": 57527,
31
- "<s_item_desc>": 57543,
32
- "<s_item_gross_worth>": 57553,
33
- "<s_item_net_price>": 57547,
34
- "<s_item_net_worth>": 57549,
35
- "<s_item_qty>": 57545,
36
- "<s_item_vat>": 57551,
37
- "<s_items>": 57541,
38
- "<s_seller>": 57531,
39
- "<s_seller_tax_id>": 57535,
40
- "<s_summary>": 57555,
41
  "<s_synthdog>": 57524,
42
- "<s_total_gross_worth>": 57561,
43
- "<s_total_net_worth>": 57557,
44
- "<s_total_vat>": 57559,
45
  "<sep/>": 57522
46
  }
 
1
  {
2
+ "</s_activite>": 57526,
3
+ "</s_date_imm>": 57528,
4
+ "<s_activite>": 57525,
5
+ "<s_cord-v2>": 57529,
6
+ "<s_date_imm>": 57527,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  "<s_iitcdip>": 57523,
 
 
 
 
 
 
 
 
 
 
 
 
8
  "<s_synthdog>": 57524,
 
 
 
9
  "<sep/>": 57522
10
  }
tokenizer.json CHANGED
@@ -91,7 +91,7 @@
91
  },
92
  {
93
  "id": 57525,
94
- "content": "<s_header>",
95
  "single_word": false,
96
  "lstrip": false,
97
  "rstrip": false,
@@ -100,7 +100,7 @@
100
  },
101
  {
102
  "id": 57526,
103
- "content": "</s_header>",
104
  "single_word": false,
105
  "lstrip": false,
106
  "rstrip": false,
@@ -109,7 +109,7 @@
109
  },
110
  {
111
  "id": 57527,
112
- "content": "<s_invoice_no>",
113
  "single_word": false,
114
  "lstrip": false,
115
  "rstrip": false,
@@ -118,7 +118,7 @@
118
  },
119
  {
120
  "id": 57528,
121
- "content": "</s_invoice_no>",
122
  "single_word": false,
123
  "lstrip": false,
124
  "rstrip": false,
@@ -127,330 +127,6 @@
127
  },
128
  {
129
  "id": 57529,
130
- "content": "<s_invoice_date>",
131
- "single_word": false,
132
- "lstrip": false,
133
- "rstrip": false,
134
- "normalized": true,
135
- "special": false
136
- },
137
- {
138
- "id": 57530,
139
- "content": "</s_invoice_date>",
140
- "single_word": false,
141
- "lstrip": false,
142
- "rstrip": false,
143
- "normalized": true,
144
- "special": false
145
- },
146
- {
147
- "id": 57531,
148
- "content": "<s_seller>",
149
- "single_word": false,
150
- "lstrip": false,
151
- "rstrip": false,
152
- "normalized": true,
153
- "special": false
154
- },
155
- {
156
- "id": 57532,
157
- "content": "</s_seller>",
158
- "single_word": false,
159
- "lstrip": false,
160
- "rstrip": false,
161
- "normalized": true,
162
- "special": false
163
- },
164
- {
165
- "id": 57533,
166
- "content": "<s_client>",
167
- "single_word": false,
168
- "lstrip": false,
169
- "rstrip": false,
170
- "normalized": true,
171
- "special": false
172
- },
173
- {
174
- "id": 57534,
175
- "content": "</s_client>",
176
- "single_word": false,
177
- "lstrip": false,
178
- "rstrip": false,
179
- "normalized": true,
180
- "special": false
181
- },
182
- {
183
- "id": 57535,
184
- "content": "<s_seller_tax_id>",
185
- "single_word": false,
186
- "lstrip": false,
187
- "rstrip": false,
188
- "normalized": true,
189
- "special": false
190
- },
191
- {
192
- "id": 57536,
193
- "content": "</s_seller_tax_id>",
194
- "single_word": false,
195
- "lstrip": false,
196
- "rstrip": false,
197
- "normalized": true,
198
- "special": false
199
- },
200
- {
201
- "id": 57537,
202
- "content": "<s_client_tax_id>",
203
- "single_word": false,
204
- "lstrip": false,
205
- "rstrip": false,
206
- "normalized": true,
207
- "special": false
208
- },
209
- {
210
- "id": 57538,
211
- "content": "</s_client_tax_id>",
212
- "single_word": false,
213
- "lstrip": false,
214
- "rstrip": false,
215
- "normalized": true,
216
- "special": false
217
- },
218
- {
219
- "id": 57539,
220
- "content": "<s_iban>",
221
- "single_word": false,
222
- "lstrip": false,
223
- "rstrip": false,
224
- "normalized": true,
225
- "special": false
226
- },
227
- {
228
- "id": 57540,
229
- "content": "</s_iban>",
230
- "single_word": false,
231
- "lstrip": false,
232
- "rstrip": false,
233
- "normalized": true,
234
- "special": false
235
- },
236
- {
237
- "id": 57541,
238
- "content": "<s_items>",
239
- "single_word": false,
240
- "lstrip": false,
241
- "rstrip": false,
242
- "normalized": true,
243
- "special": false
244
- },
245
- {
246
- "id": 57542,
247
- "content": "</s_items>",
248
- "single_word": false,
249
- "lstrip": false,
250
- "rstrip": false,
251
- "normalized": true,
252
- "special": false
253
- },
254
- {
255
- "id": 57543,
256
- "content": "<s_item_desc>",
257
- "single_word": false,
258
- "lstrip": false,
259
- "rstrip": false,
260
- "normalized": true,
261
- "special": false
262
- },
263
- {
264
- "id": 57544,
265
- "content": "</s_item_desc>",
266
- "single_word": false,
267
- "lstrip": false,
268
- "rstrip": false,
269
- "normalized": true,
270
- "special": false
271
- },
272
- {
273
- "id": 57545,
274
- "content": "<s_item_qty>",
275
- "single_word": false,
276
- "lstrip": false,
277
- "rstrip": false,
278
- "normalized": true,
279
- "special": false
280
- },
281
- {
282
- "id": 57546,
283
- "content": "</s_item_qty>",
284
- "single_word": false,
285
- "lstrip": false,
286
- "rstrip": false,
287
- "normalized": true,
288
- "special": false
289
- },
290
- {
291
- "id": 57547,
292
- "content": "<s_item_net_price>",
293
- "single_word": false,
294
- "lstrip": false,
295
- "rstrip": false,
296
- "normalized": true,
297
- "special": false
298
- },
299
- {
300
- "id": 57548,
301
- "content": "</s_item_net_price>",
302
- "single_word": false,
303
- "lstrip": false,
304
- "rstrip": false,
305
- "normalized": true,
306
- "special": false
307
- },
308
- {
309
- "id": 57549,
310
- "content": "<s_item_net_worth>",
311
- "single_word": false,
312
- "lstrip": false,
313
- "rstrip": false,
314
- "normalized": true,
315
- "special": false
316
- },
317
- {
318
- "id": 57550,
319
- "content": "</s_item_net_worth>",
320
- "single_word": false,
321
- "lstrip": false,
322
- "rstrip": false,
323
- "normalized": true,
324
- "special": false
325
- },
326
- {
327
- "id": 57551,
328
- "content": "<s_item_vat>",
329
- "single_word": false,
330
- "lstrip": false,
331
- "rstrip": false,
332
- "normalized": true,
333
- "special": false
334
- },
335
- {
336
- "id": 57552,
337
- "content": "</s_item_vat>",
338
- "single_word": false,
339
- "lstrip": false,
340
- "rstrip": false,
341
- "normalized": true,
342
- "special": false
343
- },
344
- {
345
- "id": 57553,
346
- "content": "<s_item_gross_worth>",
347
- "single_word": false,
348
- "lstrip": false,
349
- "rstrip": false,
350
- "normalized": true,
351
- "special": false
352
- },
353
- {
354
- "id": 57554,
355
- "content": "</s_item_gross_worth>",
356
- "single_word": false,
357
- "lstrip": false,
358
- "rstrip": false,
359
- "normalized": true,
360
- "special": false
361
- },
362
- {
363
- "id": 57555,
364
- "content": "<s_summary>",
365
- "single_word": false,
366
- "lstrip": false,
367
- "rstrip": false,
368
- "normalized": true,
369
- "special": false
370
- },
371
- {
372
- "id": 57556,
373
- "content": "</s_summary>",
374
- "single_word": false,
375
- "lstrip": false,
376
- "rstrip": false,
377
- "normalized": true,
378
- "special": false
379
- },
380
- {
381
- "id": 57557,
382
- "content": "<s_total_net_worth>",
383
- "single_word": false,
384
- "lstrip": false,
385
- "rstrip": false,
386
- "normalized": true,
387
- "special": false
388
- },
389
- {
390
- "id": 57558,
391
- "content": "</s_total_net_worth>",
392
- "single_word": false,
393
- "lstrip": false,
394
- "rstrip": false,
395
- "normalized": true,
396
- "special": false
397
- },
398
- {
399
- "id": 57559,
400
- "content": "<s_total_vat>",
401
- "single_word": false,
402
- "lstrip": false,
403
- "rstrip": false,
404
- "normalized": true,
405
- "special": false
406
- },
407
- {
408
- "id": 57560,
409
- "content": "</s_total_vat>",
410
- "single_word": false,
411
- "lstrip": false,
412
- "rstrip": false,
413
- "normalized": true,
414
- "special": false
415
- },
416
- {
417
- "id": 57561,
418
- "content": "<s_total_gross_worth>",
419
- "single_word": false,
420
- "lstrip": false,
421
- "rstrip": false,
422
- "normalized": true,
423
- "special": false
424
- },
425
- {
426
- "id": 57562,
427
- "content": "</s_total_gross_worth>",
428
- "single_word": false,
429
- "lstrip": false,
430
- "rstrip": false,
431
- "normalized": true,
432
- "special": false
433
- },
434
- {
435
- "id": 57563,
436
- "content": "<s_None>",
437
- "single_word": false,
438
- "lstrip": false,
439
- "rstrip": false,
440
- "normalized": true,
441
- "special": false
442
- },
443
- {
444
- "id": 57564,
445
- "content": "</s_None>",
446
- "single_word": false,
447
- "lstrip": false,
448
- "rstrip": false,
449
- "normalized": true,
450
- "special": false
451
- },
452
- {
453
- "id": 57565,
454
  "content": "<s_cord-v2>",
455
  "single_word": false,
456
  "lstrip": false,
 
91
  },
92
  {
93
  "id": 57525,
94
+ "content": "<s_activite>",
95
  "single_word": false,
96
  "lstrip": false,
97
  "rstrip": false,
 
100
  },
101
  {
102
  "id": 57526,
103
+ "content": "</s_activite>",
104
  "single_word": false,
105
  "lstrip": false,
106
  "rstrip": false,
 
109
  },
110
  {
111
  "id": 57527,
112
+ "content": "<s_date_imm>",
113
  "single_word": false,
114
  "lstrip": false,
115
  "rstrip": false,
 
118
  },
119
  {
120
  "id": 57528,
121
+ "content": "</s_date_imm>",
122
  "single_word": false,
123
  "lstrip": false,
124
  "rstrip": false,
 
127
  },
128
  {
129
  "id": 57529,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
  "content": "<s_cord-v2>",
131
  "single_word": false,
132
  "lstrip": false,