GinnM commited on
Commit
ffc31a2
1 Parent(s): 2d4d24f

Upload tokenizer

Browse files
Files changed (3) hide show
  1. special_tokens_map.json +9 -0
  2. tokenizer.json +1740 -0
  3. tokenizer_config.json +5 -0
special_tokens_map.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<cls>",
3
+ "cls_token": "<cls>",
4
+ "eos_token": "<sep>",
5
+ "mask_token": "<mask>",
6
+ "pad_token": "<pad>",
7
+ "sep_token": "<sep>",
8
+ "unk_token": "<unk>"
9
+ }
tokenizer.json ADDED
@@ -0,0 +1,1740 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": "1.0",
3
+ "truncation": null,
4
+ "padding": null,
5
+ "added_tokens": [
6
+ {
7
+ "id": 0,
8
+ "content": "<pad>",
9
+ "single_word": false,
10
+ "lstrip": false,
11
+ "rstrip": false,
12
+ "normalized": false,
13
+ "special": true
14
+ },
15
+ {
16
+ "id": 1,
17
+ "content": "<cls>",
18
+ "single_word": false,
19
+ "lstrip": false,
20
+ "rstrip": false,
21
+ "normalized": false,
22
+ "special": true
23
+ },
24
+ {
25
+ "id": 2,
26
+ "content": "<sep>",
27
+ "single_word": false,
28
+ "lstrip": false,
29
+ "rstrip": false,
30
+ "normalized": false,
31
+ "special": true
32
+ },
33
+ {
34
+ "id": 3,
35
+ "content": "<unk>",
36
+ "single_word": false,
37
+ "lstrip": false,
38
+ "rstrip": false,
39
+ "normalized": false,
40
+ "special": true
41
+ },
42
+ {
43
+ "id": 4,
44
+ "content": "<mask>",
45
+ "single_word": false,
46
+ "lstrip": false,
47
+ "rstrip": false,
48
+ "normalized": false,
49
+ "special": true
50
+ }
51
+ ],
52
+ "normalizer": null,
53
+ "pre_tokenizer": null,
54
+ "post_processor": {
55
+ "type": "TemplateProcessing",
56
+ "single": [
57
+ {
58
+ "SpecialToken": {
59
+ "id": "<cls>",
60
+ "type_id": 0
61
+ }
62
+ },
63
+ {
64
+ "Sequence": {
65
+ "id": "A",
66
+ "type_id": 0
67
+ }
68
+ },
69
+ {
70
+ "SpecialToken": {
71
+ "id": "<sep>",
72
+ "type_id": 0
73
+ }
74
+ }
75
+ ],
76
+ "pair": [
77
+ {
78
+ "SpecialToken": {
79
+ "id": "<cls>",
80
+ "type_id": 0
81
+ }
82
+ },
83
+ {
84
+ "Sequence": {
85
+ "id": "A",
86
+ "type_id": 0
87
+ }
88
+ },
89
+ {
90
+ "SpecialToken": {
91
+ "id": "<sep>",
92
+ "type_id": 0
93
+ }
94
+ },
95
+ {
96
+ "Sequence": {
97
+ "id": "B",
98
+ "type_id": 1
99
+ }
100
+ },
101
+ {
102
+ "SpecialToken": {
103
+ "id": "<sep>",
104
+ "type_id": 1
105
+ }
106
+ }
107
+ ],
108
+ "special_tokens": {
109
+ "<cls>": {
110
+ "id": "<cls>",
111
+ "ids": [
112
+ 1
113
+ ],
114
+ "tokens": [
115
+ "<cls>"
116
+ ]
117
+ },
118
+ "<sep>": {
119
+ "id": "<sep>",
120
+ "ids": [
121
+ 2
122
+ ],
123
+ "tokens": [
124
+ "<sep>"
125
+ ]
126
+ }
127
+ }
128
+ },
129
+ "decoder": {
130
+ "type": "Metaspace",
131
+ "replacement": "▁",
132
+ "add_prefix_space": true
133
+ },
134
+ "model": {
135
+ "type": "Unigram",
136
+ "unk_id": 3,
137
+ "vocab": [
138
+ [
139
+ "<pad>",
140
+ 0.0
141
+ ],
142
+ [
143
+ "<cls>",
144
+ 0.0
145
+ ],
146
+ [
147
+ "<sep>",
148
+ 0.0
149
+ ],
150
+ [
151
+ "<unk>",
152
+ 0.0
153
+ ],
154
+ [
155
+ "<mask>",
156
+ 0.0
157
+ ],
158
+ [
159
+ "M",
160
+ -3.7554998222976135
161
+ ],
162
+ [
163
+ "L",
164
+ -3.8113246143449064
165
+ ],
166
+ [
167
+ "A",
168
+ -3.929452944578113
169
+ ],
170
+ [
171
+ "S",
172
+ -4.070089441253058
173
+ ],
174
+ [
175
+ "G",
176
+ -4.175578480960867
177
+ ],
178
+ [
179
+ "V",
180
+ -4.19898007041788
181
+ ],
182
+ [
183
+ "E",
184
+ -4.248625528580556
185
+ ],
186
+ [
187
+ "R",
188
+ -4.30521371576674
189
+ ],
190
+ [
191
+ "T",
192
+ -4.3547083562161415
193
+ ],
194
+ [
195
+ "D",
196
+ -4.434345888765602
197
+ ],
198
+ [
199
+ "I",
200
+ -4.449364926533452
201
+ ],
202
+ [
203
+ "K",
204
+ -4.511332851739709
205
+ ],
206
+ [
207
+ "P",
208
+ -4.619463281312422
209
+ ],
210
+ [
211
+ "Q",
212
+ -4.738242292555999
213
+ ],
214
+ [
215
+ "N",
216
+ -4.770953998366815
217
+ ],
218
+ [
219
+ "F",
220
+ -4.800739863634963
221
+ ],
222
+ [
223
+ "Y",
224
+ -5.073279733675532
225
+ ],
226
+ [
227
+ "AA",
228
+ -5.180294994653282
229
+ ],
230
+ [
231
+ "H",
232
+ -5.255313458493209
233
+ ],
234
+ [
235
+ "AL",
236
+ -5.256136732538366
237
+ ],
238
+ [
239
+ "SS",
240
+ -5.274707579045808
241
+ ],
242
+ [
243
+ "RL",
244
+ -5.293291689480101
245
+ ],
246
+ [
247
+ "SL",
248
+ -5.33268637310386
249
+ ],
250
+ [
251
+ "EL",
252
+ -5.345462058887769
253
+ ],
254
+ [
255
+ "AV",
256
+ -5.356852232583046
257
+ ],
258
+ [
259
+ "VL",
260
+ -5.362587307262711
261
+ ],
262
+ [
263
+ "LL",
264
+ -5.366658891903455
265
+ ],
266
+ [
267
+ "LV",
268
+ -5.394608845347163
269
+ ],
270
+ [
271
+ "LS",
272
+ -5.401963599430987
273
+ ],
274
+ [
275
+ "TL",
276
+ -5.402168852817569
277
+ ],
278
+ [
279
+ "GG",
280
+ -5.411615935455101
281
+ ],
282
+ [
283
+ "AG",
284
+ -5.419236330541118
285
+ ],
286
+ [
287
+ "RR",
288
+ -5.43339567319355
289
+ ],
290
+ [
291
+ "DL",
292
+ -5.445033390482839
293
+ ],
294
+ [
295
+ "TA",
296
+ -5.455873545323126
297
+ ],
298
+ [
299
+ "LP",
300
+ -5.461310693560275
301
+ ],
302
+ [
303
+ "LA",
304
+ -5.4628078134596905
305
+ ],
306
+ [
307
+ "KK",
308
+ -5.467723860199339
309
+ ],
310
+ [
311
+ "LR",
312
+ -5.4753318342191974
313
+ ],
314
+ [
315
+ "GL",
316
+ -5.478202813114974
317
+ ],
318
+ [
319
+ "VV",
320
+ -5.486694064384039
321
+ ],
322
+ [
323
+ "EA",
324
+ -5.495578102292059
325
+ ],
326
+ [
327
+ "SG",
328
+ -5.496119603547189
329
+ ],
330
+ [
331
+ "EE",
332
+ -5.504754093460143
333
+ ],
334
+ [
335
+ "LT",
336
+ -5.513857281259391
337
+ ],
338
+ [
339
+ "AR",
340
+ -5.516661966843117
341
+ ],
342
+ [
343
+ "SA",
344
+ -5.524242228572543
345
+ ],
346
+ [
347
+ "VA",
348
+ -5.5316986916044115
349
+ ],
350
+ [
351
+ "LG",
352
+ -5.5337598194532305
353
+ ],
354
+ [
355
+ "AS",
356
+ -5.5350732190609975
357
+ ],
358
+ [
359
+ "DG",
360
+ -5.559530821529599
361
+ ],
362
+ [
363
+ "IL",
364
+ -5.567825680332113
365
+ ],
366
+ [
367
+ "GA",
368
+ -5.573596685323061
369
+ ],
370
+ [
371
+ "DA",
372
+ -5.5770985515777145
373
+ ],
374
+ [
375
+ "LI",
376
+ -5.5815072523044424
377
+ ],
378
+ [
379
+ "SV",
380
+ -5.588368091399506
381
+ ],
382
+ [
383
+ "AT",
384
+ -5.591999886453596
385
+ ],
386
+ [
387
+ "EK",
388
+ -5.593722991992374
389
+ ],
390
+ [
391
+ "TG",
392
+ -5.599105360711524
393
+ ],
394
+ [
395
+ "GV",
396
+ -5.605047704696927
397
+ ],
398
+ [
399
+ "LK",
400
+ -5.608637369191923
401
+ ],
402
+ [
403
+ "TV",
404
+ -5.6098757676491395
405
+ ],
406
+ [
407
+ "RA",
408
+ -5.610922630242422
409
+ ],
410
+ [
411
+ "VS",
412
+ -5.616186222903277
413
+ ],
414
+ [
415
+ "LD",
416
+ -5.6204333179938555
417
+ ],
418
+ [
419
+ "ST",
420
+ -5.625871192726656
421
+ ],
422
+ [
423
+ "W",
424
+ -5.634342467170761
425
+ ],
426
+ [
427
+ "KL",
428
+ -5.637251745823825
429
+ ],
430
+ [
431
+ "LE",
432
+ -5.639530618547097
433
+ ],
434
+ [
435
+ "AE",
436
+ -5.640876162056811
437
+ ],
438
+ [
439
+ "PS",
440
+ -5.647705082707752
441
+ ],
442
+ [
443
+ "TS",
444
+ -5.648888327721908
445
+ ],
446
+ [
447
+ "PL",
448
+ -5.660867491980687
449
+ ],
450
+ [
451
+ "GR",
452
+ -5.663121326009481
453
+ ],
454
+ [
455
+ "C",
456
+ -5.665365480390037
457
+ ],
458
+ [
459
+ "GS",
460
+ -5.685370240409375
461
+ ],
462
+ [
463
+ "IS",
464
+ -5.687881935937016
465
+ ],
466
+ [
467
+ "IA",
468
+ -5.690759818157099
469
+ ],
470
+ [
471
+ "PA",
472
+ -5.693436832981103
473
+ ],
474
+ [
475
+ "SP",
476
+ -5.696220479040221
477
+ ],
478
+ [
479
+ "AD",
480
+ -5.699750321846256
481
+ ],
482
+ [
483
+ "SI",
484
+ -5.7124832899981595
485
+ ],
486
+ [
487
+ "ER",
488
+ -5.714473009446468
489
+ ],
490
+ [
491
+ "DE",
492
+ -5.715725559713832
493
+ ],
494
+ [
495
+ "FL",
496
+ -5.715911554859167
497
+ ],
498
+ [
499
+ "QL",
500
+ -5.722289039124453
501
+ ],
502
+ [
503
+ "EI",
504
+ -5.743299609400362
505
+ ],
506
+ [
507
+ "LF",
508
+ -5.743837872065123
509
+ ],
510
+ [
511
+ "VT",
512
+ -5.748080112587042
513
+ ],
514
+ [
515
+ "KE",
516
+ -5.751729723846893
517
+ ],
518
+ [
519
+ "NL",
520
+ -5.752026246133266
521
+ ],
522
+ [
523
+ "TP",
524
+ -5.754766113814597
525
+ ],
526
+ [
527
+ "AI",
528
+ -5.758179225776413
529
+ ],
530
+ [
531
+ "PG",
532
+ -5.75835584857416
533
+ ],
534
+ [
535
+ "DD",
536
+ -5.760260817034601
537
+ ],
538
+ [
539
+ "TT",
540
+ -5.761019530561207
541
+ ],
542
+ [
543
+ "PV",
544
+ -5.769378591783898
545
+ ],
546
+ [
547
+ "LQ",
548
+ -5.784725130722899
549
+ ],
550
+ [
551
+ "GT",
552
+ -5.785612796672989
553
+ ],
554
+ [
555
+ "VG",
556
+ -5.786130386822109
557
+ ],
558
+ [
559
+ "VE",
560
+ -5.78768926512479
561
+ ],
562
+ [
563
+ "SR",
564
+ -5.788240316117591
565
+ ],
566
+ [
567
+ "EV",
568
+ -5.802568621488563
569
+ ],
570
+ [
571
+ "SD",
572
+ -5.805722285577948
573
+ ],
574
+ [
575
+ "RS",
576
+ -5.810936894680548
577
+ ],
578
+ [
579
+ "DV",
580
+ -5.811419800922868
581
+ ],
582
+ [
583
+ "LN",
584
+ -5.817415663129307
585
+ ],
586
+ [
587
+ "VD",
588
+ -5.83143015426864
589
+ ],
590
+ [
591
+ "GE",
592
+ -5.833531193527051
593
+ ],
594
+ [
595
+ "GD",
596
+ -5.8469033335289
597
+ ],
598
+ [
599
+ "VR",
600
+ -5.856568344130652
601
+ ],
602
+ [
603
+ "KI",
604
+ -5.8623043411537665
605
+ ],
606
+ [
607
+ "GI",
608
+ -5.8630793373144385
609
+ ],
610
+ [
611
+ "AP",
612
+ -5.879687708770435
613
+ ],
614
+ [
615
+ "ID",
616
+ -5.883238271741968
617
+ ],
618
+ [
619
+ "RV",
620
+ -5.8842699617547645
621
+ ],
622
+ [
623
+ "FS",
624
+ -5.887360180304517
625
+ ],
626
+ [
627
+ "PP",
628
+ -5.894810030801418
629
+ ],
630
+ [
631
+ "IV",
632
+ -5.8956944349131355
633
+ ],
634
+ [
635
+ "PE",
636
+ -5.896410951628802
637
+ ],
638
+ [
639
+ "SE",
640
+ -5.896945777694583
641
+ ],
642
+ [
643
+ "QA",
644
+ -5.906632141024321
645
+ ],
646
+ [
647
+ "RG",
648
+ -5.9069757976541055
649
+ ],
650
+ [
651
+ "IE",
652
+ -5.912724359358114
653
+ ],
654
+ [
655
+ "TI",
656
+ -5.915714179370614
657
+ ],
658
+ [
659
+ "RE",
660
+ -5.9162806686236085
661
+ ],
662
+ [
663
+ "SF",
664
+ -5.91847158645221
665
+ ],
666
+ [
667
+ "II",
668
+ -5.9188429023501765
669
+ ],
670
+ [
671
+ "IG",
672
+ -5.9199993374731825
673
+ ],
674
+ [
675
+ "YL",
676
+ -5.926873386536887
677
+ ],
678
+ [
679
+ "KA",
680
+ -5.9281114292795625
681
+ ],
682
+ [
683
+ "DS",
684
+ -5.928343424626654
685
+ ],
686
+ [
687
+ "EG",
688
+ -5.933844733606746
689
+ ],
690
+ [
691
+ "GK",
692
+ -5.936684971065571
693
+ ],
694
+ [
695
+ "ED",
696
+ -5.938162601603889
697
+ ],
698
+ [
699
+ "NG",
700
+ -5.93910486688115
701
+ ],
702
+ [
703
+ "VI",
704
+ -5.942816252417309
705
+ ],
706
+ [
707
+ "DI",
708
+ -5.947285889597431
709
+ ],
710
+ [
711
+ "VP",
712
+ -5.972836552098425
713
+ ],
714
+ [
715
+ "SK",
716
+ -5.9761669579291254
717
+ ],
718
+ [
719
+ "ES",
720
+ -5.97636412952019
721
+ ],
722
+ [
723
+ "IT",
724
+ -5.983356966596592
725
+ ],
726
+ [
727
+ "KS",
728
+ -5.9863896394536
729
+ ],
730
+ [
731
+ "PT",
732
+ -5.997915175139532
733
+ ],
734
+ [
735
+ "ET",
736
+ -6.00229008899732
737
+ ],
738
+ [
739
+ "SN",
740
+ -6.020063118633141
741
+ ],
742
+ [
743
+ "RI",
744
+ -6.031790135244016
745
+ ],
746
+ [
747
+ "PD",
748
+ -6.031819590375921
749
+ ],
750
+ [
751
+ "NS",
752
+ -6.0363423767904525
753
+ ],
754
+ [
755
+ "KN",
756
+ -6.0378774860056215
757
+ ],
758
+ [
759
+ "IK",
760
+ -6.038751874410973
761
+ ],
762
+ [
763
+ "NI",
764
+ -6.04008599014556
765
+ ],
766
+ [
767
+ "RP",
768
+ -6.042044632263918
769
+ ],
770
+ [
771
+ "AQ",
772
+ -6.043949572861646
773
+ ],
774
+ [
775
+ "EN",
776
+ -6.0465051216437224
777
+ ],
778
+ [
779
+ "AK",
780
+ -6.0592501887250645
781
+ ],
782
+ [
783
+ "NN",
784
+ -6.060228044634828
785
+ ],
786
+ [
787
+ "RK",
788
+ -6.064544989017104
789
+ ],
790
+ [
791
+ "AF",
792
+ -6.0668568211048495
793
+ ],
794
+ [
795
+ "KT",
796
+ -6.078252886845407
797
+ ],
798
+ [
799
+ "DP",
800
+ -6.078383145369486
801
+ ],
802
+ [
803
+ "KR",
804
+ -6.084014477311472
805
+ ],
806
+ [
807
+ "DR",
808
+ -6.092330615859543
809
+ ],
810
+ [
811
+ "TD",
812
+ -6.095090514806881
813
+ ],
814
+ [
815
+ "FG",
816
+ -6.097526450694687
817
+ ],
818
+ [
819
+ "GF",
820
+ -6.1047851609553945
821
+ ],
822
+ [
823
+ "EQ",
824
+ -6.106277553984741
825
+ ],
826
+ [
827
+ "IN",
828
+ -6.126815301246005
829
+ ],
830
+ [
831
+ "KV",
832
+ -6.1295228278013205
833
+ ],
834
+ [
835
+ "RT",
836
+ -6.1419010306979285
837
+ ],
838
+ [
839
+ "FA",
840
+ -6.143750275918618
841
+ ],
842
+ [
843
+ "VK",
844
+ -6.149790853604548
845
+ ],
846
+ [
847
+ "TE",
848
+ -6.15066955038421
849
+ ],
850
+ [
851
+ "RD",
852
+ -6.155114536833587
853
+ ],
854
+ [
855
+ "VF",
856
+ -6.162870289918667
857
+ ],
858
+ [
859
+ "HL",
860
+ -6.163398269812998
861
+ ],
862
+ [
863
+ "NA",
864
+ -6.1666252322848205
865
+ ],
866
+ [
867
+ "KD",
868
+ -6.167548918330581
869
+ ],
870
+ [
871
+ "QR",
872
+ -6.167869692707821
873
+ ],
874
+ [
875
+ "IP",
876
+ -6.173584512141391
877
+ ],
878
+ [
879
+ "GN",
880
+ -6.175114211182342
881
+ ],
882
+ [
883
+ "FV",
884
+ -6.18545702305768
885
+ ],
886
+ [
887
+ "QQ",
888
+ -6.211504041727698
889
+ ],
890
+ [
891
+ "FD",
892
+ -6.2137727853653235
893
+ ],
894
+ [
895
+ "SQ",
896
+ -6.214353555094291
897
+ ],
898
+ [
899
+ "PR",
900
+ -6.2224960874024475
901
+ ],
902
+ [
903
+ "DF",
904
+ -6.228923889662058
905
+ ],
906
+ [
907
+ "TR",
908
+ -6.25053741869719
909
+ ],
910
+ [
911
+ "LY",
912
+ -6.253159945484992
913
+ ],
914
+ [
915
+ "DT",
916
+ -6.25749099125704
917
+ ],
918
+ [
919
+ "QS",
920
+ -6.260171750484636
921
+ ],
922
+ [
923
+ "RQ",
924
+ -6.2643672420076015
925
+ ],
926
+ [
927
+ "IR",
928
+ -6.266028032362399
929
+ ],
930
+ [
931
+ "KG",
932
+ -6.2882724696317
933
+ ],
934
+ [
935
+ "NK",
936
+ -6.288947565387883
937
+ ],
938
+ [
939
+ "NV",
940
+ -6.296993183723982
941
+ ],
942
+ [
943
+ "IF",
944
+ -6.305481212608115
945
+ ],
946
+ [
947
+ "FI",
948
+ -6.306611439997543
949
+ ],
950
+ [
951
+ "NP",
952
+ -6.309441732932735
953
+ ],
954
+ [
955
+ "RF",
956
+ -6.314648488622488
957
+ ],
958
+ [
959
+ "QV",
960
+ -6.317190660900295
961
+ ],
962
+ [
963
+ "GY",
964
+ -6.325515148266389
965
+ ],
966
+ [
967
+ "NT",
968
+ -6.328395239516608
969
+ ],
970
+ [
971
+ "VN",
972
+ -6.329827511719881
973
+ ],
974
+ [
975
+ "DK",
976
+ -6.334597017980267
977
+ ],
978
+ [
979
+ "TF",
980
+ -6.342878033878224
981
+ ],
982
+ [
983
+ "QE",
984
+ -6.351226011746206
985
+ ],
986
+ [
987
+ "LH",
988
+ -6.351656675782058
989
+ ],
990
+ [
991
+ "GQ",
992
+ -6.354052916696954
993
+ ],
994
+ [
995
+ "FF",
996
+ -6.3541826185553845
997
+ ],
998
+ [
999
+ "NE",
1000
+ -6.360258091388044
1001
+ ],
1002
+ [
1003
+ "ND",
1004
+ -6.365229138404789
1005
+ ],
1006
+ [
1007
+ "FT",
1008
+ -6.365253534087179
1009
+ ],
1010
+ [
1011
+ "AN",
1012
+ -6.367249297442358
1013
+ ],
1014
+ [
1015
+ "ML",
1016
+ -6.381852180160003
1017
+ ],
1018
+ [
1019
+ "KP",
1020
+ -6.391123142939115
1021
+ ],
1022
+ [
1023
+ "QK",
1024
+ -6.394302149436534
1025
+ ],
1026
+ [
1027
+ "GP",
1028
+ -6.401941325596475
1029
+ ],
1030
+ [
1031
+ "TN",
1032
+ -6.403956547079261
1033
+ ],
1034
+ [
1035
+ "QP",
1036
+ -6.404197553058374
1037
+ ],
1038
+ [
1039
+ "YS",
1040
+ -6.410105673199096
1041
+ ],
1042
+ [
1043
+ "KQ",
1044
+ -6.411802700041866
1045
+ ],
1046
+ [
1047
+ "MA",
1048
+ -6.412768742193753
1049
+ ],
1050
+ [
1051
+ "QI",
1052
+ -6.413327913534266
1053
+ ],
1054
+ [
1055
+ "TK",
1056
+ -6.41566797713255
1057
+ ],
1058
+ [
1059
+ "QG",
1060
+ -6.428423592913639
1061
+ ],
1062
+ [
1063
+ "SY",
1064
+ -6.428843129611058
1065
+ ],
1066
+ [
1067
+ "YG",
1068
+ -6.434995604164168
1069
+ ],
1070
+ [
1071
+ "QT",
1072
+ -6.435745599477595
1073
+ ],
1074
+ [
1075
+ "EP",
1076
+ -6.45972157047451
1077
+ ],
1078
+ [
1079
+ "FE",
1080
+ -6.466723609342743
1081
+ ],
1082
+ [
1083
+ "VQ",
1084
+ -6.472310471837574
1085
+ ],
1086
+ [
1087
+ "DN",
1088
+ -6.489782494016833
1089
+ ],
1090
+ [
1091
+ "RN",
1092
+ -6.515197740114932
1093
+ ],
1094
+ [
1095
+ "PI",
1096
+ -6.528423633478061
1097
+ ],
1098
+ [
1099
+ "YA",
1100
+ -6.528628464984113
1101
+ ],
1102
+ [
1103
+ "DY",
1104
+ -6.5318047285495435
1105
+ ],
1106
+ [
1107
+ "YR",
1108
+ -6.536953476351389
1109
+ ],
1110
+ [
1111
+ "IY",
1112
+ -6.54378609550977
1113
+ ],
1114
+ [
1115
+ "EF",
1116
+ -6.545145057417711
1117
+ ],
1118
+ [
1119
+ "PQ",
1120
+ -6.54606492562564
1121
+ ],
1122
+ [
1123
+ "AY",
1124
+ -6.553263294346047
1125
+ ],
1126
+ [
1127
+ "PF",
1128
+ -6.573568517590495
1129
+ ],
1130
+ [
1131
+ "YD",
1132
+ -6.577365144652282
1133
+ ],
1134
+ [
1135
+ "NR",
1136
+ -6.593834310209083
1137
+ ],
1138
+ [
1139
+ "VY",
1140
+ -6.602079353168435
1141
+ ],
1142
+ [
1143
+ "FN",
1144
+ -6.6099051506269255
1145
+ ],
1146
+ [
1147
+ "HP",
1148
+ -6.614479894686015
1149
+ ],
1150
+ [
1151
+ "NF",
1152
+ -6.618948003679922
1153
+ ],
1154
+ [
1155
+ "IQ",
1156
+ -6.61991161209901
1157
+ ],
1158
+ [
1159
+ "HA",
1160
+ -6.622695806762524
1161
+ ],
1162
+ [
1163
+ "HG",
1164
+ -6.624021656499233
1165
+ ],
1166
+ [
1167
+ "YV",
1168
+ -6.628001659591339
1169
+ ],
1170
+ [
1171
+ "RY",
1172
+ -6.630277874220473
1173
+ ],
1174
+ [
1175
+ "YF",
1176
+ -6.63049863655667
1177
+ ],
1178
+ [
1179
+ "KY",
1180
+ -6.631618502412113
1181
+ ],
1182
+ [
1183
+ "HR",
1184
+ -6.63180572066536
1185
+ ],
1186
+ [
1187
+ "PK",
1188
+ -6.638343331559687
1189
+ ],
1190
+ [
1191
+ "QN",
1192
+ -6.642169547520918
1193
+ ],
1194
+ [
1195
+ "FR",
1196
+ -6.645194669612367
1197
+ ],
1198
+ [
1199
+ "WL",
1200
+ -6.649716632288298
1201
+ ],
1202
+ [
1203
+ "TQ",
1204
+ -6.650068339454506
1205
+ ],
1206
+ [
1207
+ "NQ",
1208
+ -6.661808602433574
1209
+ ],
1210
+ [
1211
+ "TY",
1212
+ -6.666546291153427
1213
+ ],
1214
+ [
1215
+ "YI",
1216
+ -6.68117730985826
1217
+ ],
1218
+ [
1219
+ "EY",
1220
+ -6.6825439322110505
1221
+ ],
1222
+ [
1223
+ "MS",
1224
+ -6.685659264332063
1225
+ ],
1226
+ [
1227
+ "FK",
1228
+ -6.686876384621602
1229
+ ],
1230
+ [
1231
+ "RH",
1232
+ -6.692943384891196
1233
+ ],
1234
+ [
1235
+ "NY",
1236
+ -6.693161966088065
1237
+ ],
1238
+ [
1239
+ "YT",
1240
+ -6.696314470581575
1241
+ ],
1242
+ [
1243
+ "HS",
1244
+ -6.699896880261978
1245
+ ],
1246
+ [
1247
+ "CL",
1248
+ -6.7278709776314916
1249
+ ],
1250
+ [
1251
+ "MK",
1252
+ -6.729158869863069
1253
+ ],
1254
+ [
1255
+ "YE",
1256
+ -6.740237162404444
1257
+ ],
1258
+ [
1259
+ "PN",
1260
+ -6.748556004293558
1261
+ ],
1262
+ [
1263
+ "DQ",
1264
+ -6.752494016401274
1265
+ ],
1266
+ [
1267
+ "YN",
1268
+ -6.758084059443041
1269
+ ],
1270
+ [
1271
+ "FY",
1272
+ -6.771219560164635
1273
+ ],
1274
+ [
1275
+ "SH",
1276
+ -6.771929997984971
1277
+ ],
1278
+ [
1279
+ "YY",
1280
+ -6.7785275039670445
1281
+ ],
1282
+ [
1283
+ "AH",
1284
+ -6.784066277504401
1285
+ ],
1286
+ [
1287
+ "LC",
1288
+ -6.795331846955635
1289
+ ],
1290
+ [
1291
+ "QD",
1292
+ -6.799556672133816
1293
+ ],
1294
+ [
1295
+ "FP",
1296
+ -6.799860912828933
1297
+ ],
1298
+ [
1299
+ "GH",
1300
+ -6.816256158703377
1301
+ ],
1302
+ [
1303
+ "CG",
1304
+ -6.822009516354923
1305
+ ],
1306
+ [
1307
+ "MT",
1308
+ -6.826099637598528
1309
+ ],
1310
+ [
1311
+ "YK",
1312
+ -6.83604407670324
1313
+ ],
1314
+ [
1315
+ "CS",
1316
+ -6.840754043865282
1317
+ ],
1318
+ [
1319
+ "HV",
1320
+ -6.856865180562982
1321
+ ],
1322
+ [
1323
+ "KF",
1324
+ -6.858301434538392
1325
+ ],
1326
+ [
1327
+ "MV",
1328
+ -6.891389523832441
1329
+ ],
1330
+ [
1331
+ "MR",
1332
+ -6.905406326458888
1333
+ ],
1334
+ [
1335
+ "ME",
1336
+ -6.934576424556978
1337
+ ],
1338
+ [
1339
+ "AAA",
1340
+ -6.939558197014293
1341
+ ],
1342
+ [
1343
+ "YQ",
1344
+ -6.944059094050768
1345
+ ],
1346
+ [
1347
+ "GW",
1348
+ -6.948908342056194
1349
+ ],
1350
+ [
1351
+ "VH",
1352
+ -6.963592021699016
1353
+ ],
1354
+ [
1355
+ "EH",
1356
+ -6.982434093097101
1357
+ ],
1358
+ [
1359
+ "YP",
1360
+ -6.9922748746909775
1361
+ ],
1362
+ [
1363
+ "MG",
1364
+ -7.0009021061388115
1365
+ ],
1366
+ [
1367
+ "SC",
1368
+ -7.018886742535736
1369
+ ],
1370
+ [
1371
+ "PY",
1372
+ -7.019895159128948
1373
+ ],
1374
+ [
1375
+ "MP",
1376
+ -7.02164299981605
1377
+ ],
1378
+ [
1379
+ "WR",
1380
+ -7.021948635726993
1381
+ ],
1382
+ [
1383
+ "MI",
1384
+ -7.033112682916391
1385
+ ],
1386
+ [
1387
+ "HI",
1388
+ -7.0338288373640605
1389
+ ],
1390
+ [
1391
+ "AW",
1392
+ -7.049906854643689
1393
+ ],
1394
+ [
1395
+ "QF",
1396
+ -7.052730107782535
1397
+ ],
1398
+ [
1399
+ "LW",
1400
+ -7.055909108577195
1401
+ ],
1402
+ [
1403
+ "IH",
1404
+ -7.05696157008979
1405
+ ],
1406
+ [
1407
+ "HT",
1408
+ -7.05933115883659
1409
+ ],
1410
+ [
1411
+ "FQ",
1412
+ -7.064156386962637
1413
+ ],
1414
+ [
1415
+ "HF",
1416
+ -7.081535750783493
1417
+ ],
1418
+ [
1419
+ "AC",
1420
+ -7.08460715565576
1421
+ ],
1422
+ [
1423
+ "MD",
1424
+ -7.091113763016661
1425
+ ],
1426
+ [
1427
+ "HD",
1428
+ -7.093823136459317
1429
+ ],
1430
+ [
1431
+ "HH",
1432
+ -7.100003869785056
1433
+ ],
1434
+ [
1435
+ "HE",
1436
+ -7.115880068784694
1437
+ ],
1438
+ [
1439
+ "HQ",
1440
+ -7.116088208154894
1441
+ ],
1442
+ [
1443
+ "RW",
1444
+ -7.119183575891418
1445
+ ],
1446
+ [
1447
+ "TH",
1448
+ -7.124049547598624
1449
+ ],
1450
+ [
1451
+ "PH",
1452
+ -7.135916194697529
1453
+ ],
1454
+ [
1455
+ "MN",
1456
+ -7.138628413603188
1457
+ ],
1458
+ [
1459
+ "QH",
1460
+ -7.148495617436538
1461
+ ],
1462
+ [
1463
+ "QY",
1464
+ -7.1494242906863565
1465
+ ],
1466
+ [
1467
+ "VC",
1468
+ -7.153299875141329
1469
+ ],
1470
+ [
1471
+ "SSS",
1472
+ -7.15338252469782
1473
+ ],
1474
+ [
1475
+ "GC",
1476
+ -7.162905262469382
1477
+ ],
1478
+ [
1479
+ "CR",
1480
+ -7.165901201911245
1481
+ ],
1482
+ [
1483
+ "DH",
1484
+ -7.177146233283761
1485
+ ],
1486
+ [
1487
+ "CA",
1488
+ -7.189058532078878
1489
+ ],
1490
+ [
1491
+ "SW",
1492
+ -7.191191579586656
1493
+ ],
1494
+ [
1495
+ "KH",
1496
+ -7.208616246230131
1497
+ ],
1498
+ [
1499
+ "WS",
1500
+ -7.234254173429806
1501
+ ],
1502
+ [
1503
+ "CV",
1504
+ -7.242676847889237
1505
+ ],
1506
+ [
1507
+ "CP",
1508
+ -7.278505440314529
1509
+ ],
1510
+ [
1511
+ "ALA",
1512
+ -7.285379876992367
1513
+ ],
1514
+ [
1515
+ "IC",
1516
+ -7.311679838116248
1517
+ ],
1518
+ [
1519
+ "DW",
1520
+ -7.323604216170073
1521
+ ],
1522
+ [
1523
+ "WA",
1524
+ -7.324603069949896
1525
+ ],
1526
+ [
1527
+ "RC",
1528
+ -7.327320003688586
1529
+ ],
1530
+ [
1531
+ "NH",
1532
+ -7.369652102632804
1533
+ ],
1534
+ [
1535
+ "FH",
1536
+ -7.389105632206146
1537
+ ],
1538
+ [
1539
+ "HY",
1540
+ -7.394552973439673
1541
+ ],
1542
+ [
1543
+ "VW",
1544
+ -7.410391432310593
1545
+ ],
1546
+ [
1547
+ "LAA",
1548
+ -7.422581014571122
1549
+ ],
1550
+ [
1551
+ "CD",
1552
+ -7.4236718949362
1553
+ ],
1554
+ [
1555
+ "WT",
1556
+ -7.429297576201453
1557
+ ],
1558
+ [
1559
+ "TC",
1560
+ -7.432660167900888
1561
+ ],
1562
+ [
1563
+ "WV",
1564
+ -7.448373753644203
1565
+ ],
1566
+ [
1567
+ "LLL",
1568
+ -7.468807734079539
1569
+ ],
1570
+ [
1571
+ "CT",
1572
+ -7.470533862026013
1573
+ ],
1574
+ [
1575
+ "WG",
1576
+ -7.475248237654963
1577
+ ],
1578
+ [
1579
+ "WI",
1580
+ -7.489124905202404
1581
+ ],
1582
+ [
1583
+ "TW",
1584
+ -7.490498305455075
1585
+ ],
1586
+ [
1587
+ "LLA",
1588
+ -7.49525893718625
1589
+ ],
1590
+ [
1591
+ "HK",
1592
+ -7.495881789270555
1593
+ ],
1594
+ [
1595
+ "WK",
1596
+ -7.498282114004681
1597
+ ],
1598
+ [
1599
+ "YH",
1600
+ -7.503052514793961
1601
+ ],
1602
+ [
1603
+ "GGG",
1604
+ -7.505862398990461
1605
+ ],
1606
+ [
1607
+ "EW",
1608
+ -7.509287835578606
1609
+ ],
1610
+ [
1611
+ "FC",
1612
+ -7.510894437432199
1613
+ ],
1614
+ [
1615
+ "CI",
1616
+ -7.520762931857856
1617
+ ],
1618
+ [
1619
+ "CF",
1620
+ -7.526143989437605
1621
+ ],
1622
+ [
1623
+ "HN",
1624
+ -7.527737798339141
1625
+ ],
1626
+ [
1627
+ "AAL",
1628
+ -7.559490847705204
1629
+ ],
1630
+ [
1631
+ "CE",
1632
+ -7.5719621985460766
1633
+ ],
1634
+ [
1635
+ "DC",
1636
+ -7.58937449185575
1637
+ ],
1638
+ [
1639
+ "PPP",
1640
+ -7.590470441236201
1641
+ ],
1642
+ [
1643
+ "AAG",
1644
+ -7.598108173051893
1645
+ ],
1646
+ [
1647
+ "WQ",
1648
+ -7.603633426616071
1649
+ ],
1650
+ [
1651
+ "WD",
1652
+ -7.647404262508854
1653
+ ],
1654
+ [
1655
+ "WN",
1656
+ -7.656861213585055
1657
+ ],
1658
+ [
1659
+ "CK",
1660
+ -7.657141392229375
1661
+ ],
1662
+ [
1663
+ "WE",
1664
+ -7.687892619934072
1665
+ ],
1666
+ [
1667
+ "PW",
1668
+ -7.6958759883785
1669
+ ],
1670
+ [
1671
+ "EC",
1672
+ -7.701522174207833
1673
+ ],
1674
+ [
1675
+ "NC",
1676
+ -7.724614716610493
1677
+ ],
1678
+ [
1679
+ "AVA",
1680
+ -7.727330821827604
1681
+ ],
1682
+ [
1683
+ "ALL",
1684
+ -7.739526229609453
1685
+ ],
1686
+ [
1687
+ "KC",
1688
+ -7.75549154972774
1689
+ ],
1690
+ [
1691
+ "FW",
1692
+ -7.758506176574047
1693
+ ],
1694
+ [
1695
+ "LLS",
1696
+ -7.774993372051432
1697
+ ],
1698
+ [
1699
+ "YC",
1700
+ -7.784691819693933
1701
+ ],
1702
+ [
1703
+ "WF",
1704
+ -7.802790805396517
1705
+ ],
1706
+ [
1707
+ "SLS",
1708
+ -7.820351896521961
1709
+ ],
1710
+ [
1711
+ "CC",
1712
+ -7.821005914910156
1713
+ ],
1714
+ [
1715
+ "VAA",
1716
+ -7.840678509005382
1717
+ ],
1718
+ [
1719
+ "X",
1720
+ -8.060884118618663
1721
+ ],
1722
+ [
1723
+ "B",
1724
+ -14.06671018811785
1725
+ ],
1726
+ [
1727
+ "Z",
1728
+ -15.293675060227184
1729
+ ],
1730
+ [
1731
+ "U",
1732
+ -15.973126178825922
1733
+ ],
1734
+ [
1735
+ "O",
1736
+ -19.46823425707293
1737
+ ]
1738
+ ]
1739
+ }
1740
+ }
tokenizer_config.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "clean_up_tokenization_spaces": true,
3
+ "model_max_length": 1000000000000000019884624838656,
4
+ "tokenizer_class": "PreTrainedTokenizerFast"
5
+ }