Lakoc commited on
Commit
b1b453c
·
1 Parent(s): 7fd4de6

Upload tokenizer

Browse files
Files changed (3) hide show
  1. special_tokens_map.json +7 -0
  2. tokenizer.json +971 -0
  3. tokenizer_config.json +9 -0
special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "eos_token": "</s>",
4
+ "mask_token": "<mask>",
5
+ "pad_token": "<pad>",
6
+ "unk_token": "<unk>"
7
+ }
tokenizer.json ADDED
@@ -0,0 +1,971 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": "1.0",
3
+ "truncation": null,
4
+ "padding": null,
5
+ "added_tokens": [
6
+ {
7
+ "id": 0,
8
+ "content": "<s>",
9
+ "single_word": false,
10
+ "lstrip": false,
11
+ "rstrip": false,
12
+ "normalized": false,
13
+ "special": true
14
+ },
15
+ {
16
+ "id": 1,
17
+ "content": "</s>",
18
+ "single_word": false,
19
+ "lstrip": false,
20
+ "rstrip": false,
21
+ "normalized": false,
22
+ "special": true
23
+ },
24
+ {
25
+ "id": 2,
26
+ "content": "<unk>",
27
+ "single_word": false,
28
+ "lstrip": false,
29
+ "rstrip": false,
30
+ "normalized": false,
31
+ "special": true
32
+ },
33
+ {
34
+ "id": 3,
35
+ "content": "<pad>",
36
+ "single_word": false,
37
+ "lstrip": false,
38
+ "rstrip": false,
39
+ "normalized": false,
40
+ "special": true
41
+ },
42
+ {
43
+ "id": 4,
44
+ "content": "<mask>",
45
+ "single_word": false,
46
+ "lstrip": false,
47
+ "rstrip": false,
48
+ "normalized": false,
49
+ "special": true
50
+ }
51
+ ],
52
+ "normalizer": {
53
+ "type": "Sequence",
54
+ "normalizers": [
55
+ {
56
+ "type": "Replace",
57
+ "pattern": {
58
+ "String": "``"
59
+ },
60
+ "content": "\""
61
+ },
62
+ {
63
+ "type": "Replace",
64
+ "pattern": {
65
+ "String": "''"
66
+ },
67
+ "content": "\""
68
+ },
69
+ {
70
+ "type": "Lowercase"
71
+ }
72
+ ]
73
+ },
74
+ "pre_tokenizer": {
75
+ "type": "Metaspace",
76
+ "replacement": "▁",
77
+ "add_prefix_space": true
78
+ },
79
+ "post_processor": {
80
+ "type": "TemplateProcessing",
81
+ "single": [
82
+ {
83
+ "SpecialToken": {
84
+ "id": "<s>",
85
+ "type_id": 0
86
+ }
87
+ },
88
+ {
89
+ "Sequence": {
90
+ "id": "A",
91
+ "type_id": 0
92
+ }
93
+ },
94
+ {
95
+ "SpecialToken": {
96
+ "id": "</s>",
97
+ "type_id": 0
98
+ }
99
+ }
100
+ ],
101
+ "pair": [
102
+ {
103
+ "SpecialToken": {
104
+ "id": "<s>",
105
+ "type_id": 0
106
+ }
107
+ },
108
+ {
109
+ "Sequence": {
110
+ "id": "A",
111
+ "type_id": 0
112
+ }
113
+ },
114
+ {
115
+ "SpecialToken": {
116
+ "id": "</s>",
117
+ "type_id": 0
118
+ }
119
+ },
120
+ {
121
+ "SpecialToken": {
122
+ "id": "<s>",
123
+ "type_id": 1
124
+ }
125
+ },
126
+ {
127
+ "Sequence": {
128
+ "id": "B",
129
+ "type_id": 1
130
+ }
131
+ },
132
+ {
133
+ "SpecialToken": {
134
+ "id": "</s>",
135
+ "type_id": 1
136
+ }
137
+ }
138
+ ],
139
+ "special_tokens": {
140
+ "</s>": {
141
+ "id": "</s>",
142
+ "ids": [
143
+ 1
144
+ ],
145
+ "tokens": [
146
+ "</s>"
147
+ ]
148
+ },
149
+ "<s>": {
150
+ "id": "<s>",
151
+ "ids": [
152
+ 0
153
+ ],
154
+ "tokens": [
155
+ "<s>"
156
+ ]
157
+ }
158
+ }
159
+ },
160
+ "decoder": {
161
+ "type": "Metaspace",
162
+ "replacement": "▁",
163
+ "add_prefix_space": true
164
+ },
165
+ "model": {
166
+ "type": "Unigram",
167
+ "unk_id": 2,
168
+ "vocab": [
169
+ [
170
+ "<s>",
171
+ 0.0
172
+ ],
173
+ [
174
+ "</s>",
175
+ 0.0
176
+ ],
177
+ [
178
+ "<unk>",
179
+ 0.0
180
+ ],
181
+ [
182
+ "<pad>",
183
+ 0.0
184
+ ],
185
+ [
186
+ "<mask>",
187
+ 0.0
188
+ ],
189
+ [
190
+ "▁",
191
+ -2.933881842641904
192
+ ],
193
+ [
194
+ ".",
195
+ -3.5477780942834354
196
+ ],
197
+ [
198
+ "t",
199
+ -3.589490346893994
200
+ ],
201
+ [
202
+ "a",
203
+ -3.644247019210544
204
+ ],
205
+ [
206
+ "m",
207
+ -3.6586101480214985
208
+ ],
209
+ [
210
+ "o",
211
+ -3.70095367793402
212
+ ],
213
+ [
214
+ "l",
215
+ -3.7651406443688806
216
+ ],
217
+ [
218
+ "d",
219
+ -3.8417028391607904
220
+ ],
221
+ [
222
+ "s",
223
+ -3.885144751638135
224
+ ],
225
+ [
226
+ "r",
227
+ -3.901295045785892
228
+ ],
229
+ [
230
+ "v",
231
+ -3.935602964994691
232
+ ],
233
+ [
234
+ "e",
235
+ -3.9940703496517287
236
+ ],
237
+ [
238
+ "á",
239
+ -4.033966857289286
240
+ ],
241
+ [
242
+ "u",
243
+ -4.05884556484637
244
+ ],
245
+ [
246
+ "i",
247
+ -4.1166233920812925
248
+ ],
249
+ [
250
+ "p",
251
+ -4.176589139748492
252
+ ],
253
+ [
254
+ "ní",
255
+ -4.279857833700591
256
+ ],
257
+ [
258
+ "k",
259
+ -4.289690282403042
260
+ ],
261
+ [
262
+ "í",
263
+ -4.293029126936448
264
+ ],
265
+ [
266
+ "ch",
267
+ -4.324255581534921
268
+ ],
269
+ [
270
+ "▁s",
271
+ -4.325405861628145
272
+ ],
273
+ [
274
+ "n",
275
+ -4.37776141455995
276
+ ],
277
+ [
278
+ "▁v",
279
+ -4.380004094093257
280
+ ],
281
+ [
282
+ "st",
283
+ -4.453846829326743
284
+ ],
285
+ [
286
+ "y",
287
+ -4.488741049922906
288
+ ],
289
+ [
290
+ "b",
291
+ -4.501608535676114
292
+ ],
293
+ [
294
+ "z",
295
+ -4.555001955831841
296
+ ],
297
+ [
298
+ "ě",
299
+ -4.595063469625593
300
+ ],
301
+ [
302
+ "▁a",
303
+ -4.6643161816163765
304
+ ],
305
+ [
306
+ "▁po",
307
+ -4.716152885007917
308
+ ],
309
+ [
310
+ "le",
311
+ -4.729191738166342
312
+ ],
313
+ [
314
+ "▁je",
315
+ -4.771673455593422
316
+ ],
317
+ [
318
+ "é",
319
+ -4.824781856505561
320
+ ],
321
+ [
322
+ "▁z",
323
+ -4.8481232618122245
324
+ ],
325
+ [
326
+ "▁na",
327
+ -4.9181854570660395
328
+ ],
329
+ [
330
+ "ra",
331
+ -4.9291603874766015
332
+ ],
333
+ [
334
+ "la",
335
+ -4.9390965232116475
336
+ ],
337
+ [
338
+ "▁se",
339
+ -4.940769771372215
340
+ ],
341
+ [
342
+ "ho",
343
+ -4.948526161364832
344
+ ],
345
+ [
346
+ "ů",
347
+ -4.960219219866243
348
+ ],
349
+ [
350
+ "č",
351
+ -4.97484752703166
352
+ ],
353
+ [
354
+ "h",
355
+ -4.975810635483684
356
+ ],
357
+ [
358
+ "ž",
359
+ -4.994197816023373
360
+ ],
361
+ [
362
+ "ně",
363
+ -5.0492874780193056
364
+ ],
365
+ [
366
+ "ce",
367
+ -5.055632153128059
368
+ ],
369
+ [
370
+ "▁t",
371
+ -5.067221321042702
372
+ ],
373
+ [
374
+ "c",
375
+ -5.103308475546731
376
+ ],
377
+ [
378
+ "j",
379
+ -5.113139030749984
380
+ ],
381
+ [
382
+ "▁k",
383
+ -5.121312549520173
384
+ ],
385
+ [
386
+ "li",
387
+ -5.158555052585243
388
+ ],
389
+ [
390
+ "to",
391
+ -5.187829843778575
392
+ ],
393
+ [
394
+ "te",
395
+ -5.199580018751819
396
+ ],
397
+ [
398
+ "ni",
399
+ -5.237632267398226
400
+ ],
401
+ [
402
+ "va",
403
+ -5.271035361443101
404
+ ],
405
+ [
406
+ "▁o",
407
+ -5.286563564192571
408
+ ],
409
+ [
410
+ "ou",
411
+ -5.299940532930255
412
+ ],
413
+ [
414
+ "▁do",
415
+ -5.315008911633093
416
+ ],
417
+ [
418
+ "▁b",
419
+ -5.321459316608349
420
+ ],
421
+ [
422
+ "lo",
423
+ -5.337670312261877
424
+ ],
425
+ [
426
+ "na",
427
+ -5.3707587757007245
428
+ ],
429
+ [
430
+ "▁pro",
431
+ -5.400835750750204
432
+ ],
433
+ [
434
+ "ro",
435
+ -5.407363420434973
436
+ ],
437
+ [
438
+ "š",
439
+ -5.410775069982266
440
+ ],
441
+ [
442
+ "▁d",
443
+ -5.416578704818147
444
+ ],
445
+ [
446
+ "ý",
447
+ -5.425608165733997
448
+ ],
449
+ [
450
+ "en",
451
+ -5.431546361498073
452
+ ],
453
+ [
454
+ "ře",
455
+ -5.446446776902486
456
+ ],
457
+ [
458
+ "g",
459
+ -5.472971854258994
460
+ ],
461
+ [
462
+ "▁ne",
463
+ -5.48942936029952
464
+ ],
465
+ [
466
+ "▁za",
467
+ -5.5360003618990365
468
+ ],
469
+ [
470
+ "em",
471
+ -5.540943553270047
472
+ ],
473
+ [
474
+ "ko",
475
+ -5.551449196510767
476
+ ],
477
+ [
478
+ "né",
479
+ -5.622361112385544
480
+ ],
481
+ [
482
+ "▁h",
483
+ -5.6239437580030085
484
+ ],
485
+ [
486
+ "de",
487
+ -5.643325386718073
488
+ ],
489
+ [
490
+ "ti",
491
+ -5.6447253918913765
492
+ ],
493
+ [
494
+ "▁byl",
495
+ -5.648337857155493
496
+ ],
497
+ [
498
+ "ka",
499
+ -5.654853359802825
500
+ ],
501
+ [
502
+ "ku",
503
+ -5.664006533642916
504
+ ],
505
+ [
506
+ "▁ve",
507
+ -5.725900255027681
508
+ ],
509
+ [
510
+ "ci",
511
+ -5.7649409188479614
512
+ ],
513
+ [
514
+ "ná",
515
+ -5.769164693906317
516
+ ],
517
+ [
518
+ "mi",
519
+ -5.774644808059395
520
+ ],
521
+ [
522
+ "▁ob",
523
+ -5.778962817957801
524
+ ],
525
+ [
526
+ "▁vy",
527
+ -5.789650303559062
528
+ ],
529
+ [
530
+ "jí",
531
+ -5.794495124613842
532
+ ],
533
+ [
534
+ "vě",
535
+ -5.798185586095333
536
+ ],
537
+ [
538
+ "me",
539
+ -5.80913158514987
540
+ ],
541
+ [
542
+ "ze",
543
+ -5.814166428372028
544
+ ],
545
+ [
546
+ "▁p",
547
+ -5.824208440583925
548
+ ],
549
+ [
550
+ "▁u",
551
+ -5.878757555875056
552
+ ],
553
+ [
554
+ ",",
555
+ -5.903152170013643
556
+ ],
557
+ [
558
+ "no",
559
+ -5.9057725403152395
560
+ ],
561
+ [
562
+ "ji",
563
+ -5.920428277572507
564
+ ],
565
+ [
566
+ "▁to",
567
+ -5.926340496642947
568
+ ],
569
+ [
570
+ "tu",
571
+ -5.927828549586592
572
+ ],
573
+ [
574
+ "ky",
575
+ -5.9371858287916615
576
+ ],
577
+ [
578
+ "or",
579
+ -5.946365129713133
580
+ ],
581
+ [
582
+ "ný",
583
+ -5.99112481218983
584
+ ],
585
+ [
586
+ "že",
587
+ -5.993760496634453
588
+ ],
589
+ [
590
+ "ské",
591
+ -6.004036828021916
592
+ ],
593
+ [
594
+ "ř",
595
+ -6.043049654958058
596
+ ],
597
+ [
598
+ "in",
599
+ -6.044865678749619
600
+ ],
601
+ [
602
+ "y.",
603
+ -6.064267907044521
604
+ ],
605
+ [
606
+ "ší",
607
+ -6.066469419035859
608
+ ],
609
+ [
610
+ "▁f",
611
+ -6.086007502509733
612
+ ],
613
+ [
614
+ "vá",
615
+ -6.0929776823686215
616
+ ],
617
+ [
618
+ "cí",
619
+ -6.096087103918272
620
+ ],
621
+ [
622
+ "di",
623
+ -6.1258326693684
624
+ ],
625
+ [
626
+ "ri",
627
+ -6.130335674191909
628
+ ],
629
+ [
630
+ "je",
631
+ -6.137264395372371
632
+ ],
633
+ [
634
+ "ne",
635
+ -6.1531957187534765
636
+ ],
637
+ [
638
+ "▁vý",
639
+ -6.156522965057197
640
+ ],
641
+ [
642
+ "▁ná",
643
+ -6.183729574743225
644
+ ],
645
+ [
646
+ "ří",
647
+ -6.188204941940626
648
+ ],
649
+ [
650
+ "▁pa",
651
+ -6.19758569914038
652
+ ],
653
+ [
654
+ "ny",
655
+ -6.204870892400333
656
+ ],
657
+ [
658
+ "ži",
659
+ -6.211094353405346
660
+ ],
661
+ [
662
+ "oval",
663
+ -6.239595170380871
664
+ ],
665
+ [
666
+ "f",
667
+ -6.286645633774842
668
+ ],
669
+ [
670
+ "še",
671
+ -6.287121444594022
672
+ ],
673
+ [
674
+ "▁ú",
675
+ -6.295293312777933
676
+ ],
677
+ [
678
+ "by",
679
+ -6.301525122964281
680
+ ],
681
+ [
682
+ "ru",
683
+ -6.30809270263456
684
+ ],
685
+ [
686
+ "▁od",
687
+ -6.319961842625641
688
+ ],
689
+ [
690
+ "▁zá",
691
+ -6.328547643992921
692
+ ],
693
+ [
694
+ "vi",
695
+ -6.346802728367797
696
+ ],
697
+ [
698
+ "▁ma",
699
+ -6.431912156242294
700
+ ],
701
+ [
702
+ "lu",
703
+ -6.432754126816632
704
+ ],
705
+ [
706
+ "uje",
707
+ -6.443190781270953
708
+ ],
709
+ [
710
+ "ové",
711
+ -6.467014285714815
712
+ ],
713
+ [
714
+ "nost",
715
+ -6.549975301817593
716
+ ],
717
+ [
718
+ "▁pří",
719
+ -6.5569172549306725
720
+ ],
721
+ [
722
+ "nu",
723
+ -6.570250699560692
724
+ ],
725
+ [
726
+ "▁roz",
727
+ -6.589732746829957
728
+ ],
729
+ [
730
+ "▁mo",
731
+ -6.5921275544955575
732
+ ],
733
+ [
734
+ "▁při",
735
+ -6.592289291694437
736
+ ],
737
+ [
738
+ "▁sta",
739
+ -6.604274711569781
740
+ ],
741
+ [
742
+ "▁jako",
743
+ -6.640631120618143
744
+ ],
745
+ [
746
+ "▁re",
747
+ -6.641361350548964
748
+ ],
749
+ [
750
+ "▁pře",
751
+ -6.645933416903996
752
+ ],
753
+ [
754
+ "ři",
755
+ -6.665728435048961
756
+ ],
757
+ [
758
+ "▁před",
759
+ -6.688395456755879
760
+ ],
761
+ [
762
+ "ován",
763
+ -6.6986357693587975
764
+ ],
765
+ [
766
+ "ský",
767
+ -6.7345275093187515
768
+ ],
769
+ [
770
+ "či",
771
+ -6.7556332887967985
772
+ ],
773
+ [
774
+ "stav",
775
+ -6.7565629986059985
776
+ ],
777
+ [
778
+ "▁také",
779
+ -6.784564208271911
780
+ ],
781
+ [
782
+ "ické",
783
+ -6.822473413154058
784
+ ],
785
+ [
786
+ "▁jsou",
787
+ -6.839600800409185
788
+ ],
789
+ [
790
+ "▁kon",
791
+ -6.856932521302764
792
+ ],
793
+ [
794
+ "\"",
795
+ -6.940649196032627
796
+ ],
797
+ [
798
+ "děl",
799
+ -7.012240797906627
800
+ ],
801
+ [
802
+ "▁tak",
803
+ -7.056427922232195
804
+ ],
805
+ [
806
+ "▁ja",
807
+ -7.059445413822217
808
+ ],
809
+ [
810
+ "cký",
811
+ -7.083248888291322
812
+ ],
813
+ [
814
+ "x",
815
+ -7.094945147217752
816
+ ],
817
+ [
818
+ "ň",
819
+ -7.16444676938674
820
+ ],
821
+ [
822
+ "▁však",
823
+ -7.188759209502638
824
+ ],
825
+ [
826
+ "ště",
827
+ -7.197130013703209
828
+ ],
829
+ [
830
+ "▁měst",
831
+ -7.2631846765783745
832
+ ],
833
+ [
834
+ "▁druh",
835
+ -7.282570677527317
836
+ ],
837
+ [
838
+ "▁měl",
839
+ -7.384552941270997
840
+ ],
841
+ [
842
+ "?",
843
+ -7.407692759728381
844
+ ],
845
+ [
846
+ "chází",
847
+ -7.435960685282815
848
+ ],
849
+ [
850
+ "▁mezi",
851
+ -7.507108909210451
852
+ ],
853
+ [
854
+ "w",
855
+ -7.535083601752331
856
+ ],
857
+ [
858
+ "▁několik",
859
+ -7.590293335462103
860
+ ],
861
+ [
862
+ "▁musí",
863
+ -7.677520603750372
864
+ ],
865
+ [
866
+ "▁později",
867
+ -7.718353421312045
868
+ ],
869
+ [
870
+ "▁evropsk",
871
+ -7.830696406547949
872
+ ],
873
+ [
874
+ "▁společn",
875
+ -7.874088576602077
876
+ ],
877
+ [
878
+ "ť",
879
+ -7.901054161965922
880
+ ],
881
+ [
882
+ "▁současn",
883
+ -7.9524684253043665
884
+ ],
885
+ [
886
+ "ď",
887
+ -8.011743748876743
888
+ ],
889
+ [
890
+ "ó",
891
+ -8.058627228696277
892
+ ],
893
+ [
894
+ "!",
895
+ -8.390840756999921
896
+ ],
897
+ [
898
+ "▁působil",
899
+ -8.398303443996245
900
+ ],
901
+ [
902
+ "▁univerzit",
903
+ -8.428723893618406
904
+ ],
905
+ [
906
+ "ú",
907
+ -8.69197148730532
908
+ ],
909
+ [
910
+ "“",
911
+ -8.798163432450156
912
+ ],
913
+ [
914
+ "-",
915
+ -9.81112772875104
916
+ ],
917
+ [
918
+ "–",
919
+ -10.093664746434053
920
+ ],
921
+ [
922
+ ";",
923
+ -10.766412245976962
924
+ ],
925
+ [
926
+ "ö",
927
+ -10.940654670219764
928
+ ],
929
+ [
930
+ "ü",
931
+ -11.276765781343222
932
+ ],
933
+ [
934
+ "/",
935
+ -12.36962292424032
936
+ ],
937
+ [
938
+ "è",
939
+ -12.369622924240325
940
+ ],
941
+ [
942
+ "ä",
943
+ -12.86962292424032
944
+ ],
945
+ [
946
+ ":",
947
+ -12.869622924240325
948
+ ],
949
+ [
950
+ "q",
951
+ -13.869522924240323
952
+ ],
953
+ [
954
+ "„",
955
+ -13.869622924240325
956
+ ],
957
+ [
958
+ "—",
959
+ -13.869622924240325
960
+ ],
961
+ [
962
+ "…",
963
+ -13.869622924240325
964
+ ],
965
+ [
966
+ "ï",
967
+ -13.869622924240325
968
+ ]
969
+ ]
970
+ }
971
+ }
tokenizer_config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "eos_token": "</s>",
4
+ "mask_token": "<mask>",
5
+ "model_max_length": 1000000000000000019884624838656,
6
+ "pad_token": "<pad>",
7
+ "tokenizer_class": "PreTrainedTokenizerFast",
8
+ "unk_token": "<unk>"
9
+ }