deep_unigram_400 / tokenizer.json
GinnM's picture
Upload tokenizer
ffc31a2
raw
history blame
25.5 kB
{
"version": "1.0",
"truncation": null,
"padding": null,
"added_tokens": [
{
"id": 0,
"content": "<pad>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 1,
"content": "<cls>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 2,
"content": "<sep>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 3,
"content": "<unk>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 4,
"content": "<mask>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
}
],
"normalizer": null,
"pre_tokenizer": null,
"post_processor": {
"type": "TemplateProcessing",
"single": [
{
"SpecialToken": {
"id": "<cls>",
"type_id": 0
}
},
{
"Sequence": {
"id": "A",
"type_id": 0
}
},
{
"SpecialToken": {
"id": "<sep>",
"type_id": 0
}
}
],
"pair": [
{
"SpecialToken": {
"id": "<cls>",
"type_id": 0
}
},
{
"Sequence": {
"id": "A",
"type_id": 0
}
},
{
"SpecialToken": {
"id": "<sep>",
"type_id": 0
}
},
{
"Sequence": {
"id": "B",
"type_id": 1
}
},
{
"SpecialToken": {
"id": "<sep>",
"type_id": 1
}
}
],
"special_tokens": {
"<cls>": {
"id": "<cls>",
"ids": [
1
],
"tokens": [
"<cls>"
]
},
"<sep>": {
"id": "<sep>",
"ids": [
2
],
"tokens": [
"<sep>"
]
}
}
},
"decoder": {
"type": "Metaspace",
"replacement": "▁",
"add_prefix_space": true
},
"model": {
"type": "Unigram",
"unk_id": 3,
"vocab": [
[
"<pad>",
0.0
],
[
"<cls>",
0.0
],
[
"<sep>",
0.0
],
[
"<unk>",
0.0
],
[
"<mask>",
0.0
],
[
"M",
-3.7554998222976135
],
[
"L",
-3.8113246143449064
],
[
"A",
-3.929452944578113
],
[
"S",
-4.070089441253058
],
[
"G",
-4.175578480960867
],
[
"V",
-4.19898007041788
],
[
"E",
-4.248625528580556
],
[
"R",
-4.30521371576674
],
[
"T",
-4.3547083562161415
],
[
"D",
-4.434345888765602
],
[
"I",
-4.449364926533452
],
[
"K",
-4.511332851739709
],
[
"P",
-4.619463281312422
],
[
"Q",
-4.738242292555999
],
[
"N",
-4.770953998366815
],
[
"F",
-4.800739863634963
],
[
"Y",
-5.073279733675532
],
[
"AA",
-5.180294994653282
],
[
"H",
-5.255313458493209
],
[
"AL",
-5.256136732538366
],
[
"SS",
-5.274707579045808
],
[
"RL",
-5.293291689480101
],
[
"SL",
-5.33268637310386
],
[
"EL",
-5.345462058887769
],
[
"AV",
-5.356852232583046
],
[
"VL",
-5.362587307262711
],
[
"LL",
-5.366658891903455
],
[
"LV",
-5.394608845347163
],
[
"LS",
-5.401963599430987
],
[
"TL",
-5.402168852817569
],
[
"GG",
-5.411615935455101
],
[
"AG",
-5.419236330541118
],
[
"RR",
-5.43339567319355
],
[
"DL",
-5.445033390482839
],
[
"TA",
-5.455873545323126
],
[
"LP",
-5.461310693560275
],
[
"LA",
-5.4628078134596905
],
[
"KK",
-5.467723860199339
],
[
"LR",
-5.4753318342191974
],
[
"GL",
-5.478202813114974
],
[
"VV",
-5.486694064384039
],
[
"EA",
-5.495578102292059
],
[
"SG",
-5.496119603547189
],
[
"EE",
-5.504754093460143
],
[
"LT",
-5.513857281259391
],
[
"AR",
-5.516661966843117
],
[
"SA",
-5.524242228572543
],
[
"VA",
-5.5316986916044115
],
[
"LG",
-5.5337598194532305
],
[
"AS",
-5.5350732190609975
],
[
"DG",
-5.559530821529599
],
[
"IL",
-5.567825680332113
],
[
"GA",
-5.573596685323061
],
[
"DA",
-5.5770985515777145
],
[
"LI",
-5.5815072523044424
],
[
"SV",
-5.588368091399506
],
[
"AT",
-5.591999886453596
],
[
"EK",
-5.593722991992374
],
[
"TG",
-5.599105360711524
],
[
"GV",
-5.605047704696927
],
[
"LK",
-5.608637369191923
],
[
"TV",
-5.6098757676491395
],
[
"RA",
-5.610922630242422
],
[
"VS",
-5.616186222903277
],
[
"LD",
-5.6204333179938555
],
[
"ST",
-5.625871192726656
],
[
"W",
-5.634342467170761
],
[
"KL",
-5.637251745823825
],
[
"LE",
-5.639530618547097
],
[
"AE",
-5.640876162056811
],
[
"PS",
-5.647705082707752
],
[
"TS",
-5.648888327721908
],
[
"PL",
-5.660867491980687
],
[
"GR",
-5.663121326009481
],
[
"C",
-5.665365480390037
],
[
"GS",
-5.685370240409375
],
[
"IS",
-5.687881935937016
],
[
"IA",
-5.690759818157099
],
[
"PA",
-5.693436832981103
],
[
"SP",
-5.696220479040221
],
[
"AD",
-5.699750321846256
],
[
"SI",
-5.7124832899981595
],
[
"ER",
-5.714473009446468
],
[
"DE",
-5.715725559713832
],
[
"FL",
-5.715911554859167
],
[
"QL",
-5.722289039124453
],
[
"EI",
-5.743299609400362
],
[
"LF",
-5.743837872065123
],
[
"VT",
-5.748080112587042
],
[
"KE",
-5.751729723846893
],
[
"NL",
-5.752026246133266
],
[
"TP",
-5.754766113814597
],
[
"AI",
-5.758179225776413
],
[
"PG",
-5.75835584857416
],
[
"DD",
-5.760260817034601
],
[
"TT",
-5.761019530561207
],
[
"PV",
-5.769378591783898
],
[
"LQ",
-5.784725130722899
],
[
"GT",
-5.785612796672989
],
[
"VG",
-5.786130386822109
],
[
"VE",
-5.78768926512479
],
[
"SR",
-5.788240316117591
],
[
"EV",
-5.802568621488563
],
[
"SD",
-5.805722285577948
],
[
"RS",
-5.810936894680548
],
[
"DV",
-5.811419800922868
],
[
"LN",
-5.817415663129307
],
[
"VD",
-5.83143015426864
],
[
"GE",
-5.833531193527051
],
[
"GD",
-5.8469033335289
],
[
"VR",
-5.856568344130652
],
[
"KI",
-5.8623043411537665
],
[
"GI",
-5.8630793373144385
],
[
"AP",
-5.879687708770435
],
[
"ID",
-5.883238271741968
],
[
"RV",
-5.8842699617547645
],
[
"FS",
-5.887360180304517
],
[
"PP",
-5.894810030801418
],
[
"IV",
-5.8956944349131355
],
[
"PE",
-5.896410951628802
],
[
"SE",
-5.896945777694583
],
[
"QA",
-5.906632141024321
],
[
"RG",
-5.9069757976541055
],
[
"IE",
-5.912724359358114
],
[
"TI",
-5.915714179370614
],
[
"RE",
-5.9162806686236085
],
[
"SF",
-5.91847158645221
],
[
"II",
-5.9188429023501765
],
[
"IG",
-5.9199993374731825
],
[
"YL",
-5.926873386536887
],
[
"KA",
-5.9281114292795625
],
[
"DS",
-5.928343424626654
],
[
"EG",
-5.933844733606746
],
[
"GK",
-5.936684971065571
],
[
"ED",
-5.938162601603889
],
[
"NG",
-5.93910486688115
],
[
"VI",
-5.942816252417309
],
[
"DI",
-5.947285889597431
],
[
"VP",
-5.972836552098425
],
[
"SK",
-5.9761669579291254
],
[
"ES",
-5.97636412952019
],
[
"IT",
-5.983356966596592
],
[
"KS",
-5.9863896394536
],
[
"PT",
-5.997915175139532
],
[
"ET",
-6.00229008899732
],
[
"SN",
-6.020063118633141
],
[
"RI",
-6.031790135244016
],
[
"PD",
-6.031819590375921
],
[
"NS",
-6.0363423767904525
],
[
"KN",
-6.0378774860056215
],
[
"IK",
-6.038751874410973
],
[
"NI",
-6.04008599014556
],
[
"RP",
-6.042044632263918
],
[
"AQ",
-6.043949572861646
],
[
"EN",
-6.0465051216437224
],
[
"AK",
-6.0592501887250645
],
[
"NN",
-6.060228044634828
],
[
"RK",
-6.064544989017104
],
[
"AF",
-6.0668568211048495
],
[
"KT",
-6.078252886845407
],
[
"DP",
-6.078383145369486
],
[
"KR",
-6.084014477311472
],
[
"DR",
-6.092330615859543
],
[
"TD",
-6.095090514806881
],
[
"FG",
-6.097526450694687
],
[
"GF",
-6.1047851609553945
],
[
"EQ",
-6.106277553984741
],
[
"IN",
-6.126815301246005
],
[
"KV",
-6.1295228278013205
],
[
"RT",
-6.1419010306979285
],
[
"FA",
-6.143750275918618
],
[
"VK",
-6.149790853604548
],
[
"TE",
-6.15066955038421
],
[
"RD",
-6.155114536833587
],
[
"VF",
-6.162870289918667
],
[
"HL",
-6.163398269812998
],
[
"NA",
-6.1666252322848205
],
[
"KD",
-6.167548918330581
],
[
"QR",
-6.167869692707821
],
[
"IP",
-6.173584512141391
],
[
"GN",
-6.175114211182342
],
[
"FV",
-6.18545702305768
],
[
"QQ",
-6.211504041727698
],
[
"FD",
-6.2137727853653235
],
[
"SQ",
-6.214353555094291
],
[
"PR",
-6.2224960874024475
],
[
"DF",
-6.228923889662058
],
[
"TR",
-6.25053741869719
],
[
"LY",
-6.253159945484992
],
[
"DT",
-6.25749099125704
],
[
"QS",
-6.260171750484636
],
[
"RQ",
-6.2643672420076015
],
[
"IR",
-6.266028032362399
],
[
"KG",
-6.2882724696317
],
[
"NK",
-6.288947565387883
],
[
"NV",
-6.296993183723982
],
[
"IF",
-6.305481212608115
],
[
"FI",
-6.306611439997543
],
[
"NP",
-6.309441732932735
],
[
"RF",
-6.314648488622488
],
[
"QV",
-6.317190660900295
],
[
"GY",
-6.325515148266389
],
[
"NT",
-6.328395239516608
],
[
"VN",
-6.329827511719881
],
[
"DK",
-6.334597017980267
],
[
"TF",
-6.342878033878224
],
[
"QE",
-6.351226011746206
],
[
"LH",
-6.351656675782058
],
[
"GQ",
-6.354052916696954
],
[
"FF",
-6.3541826185553845
],
[
"NE",
-6.360258091388044
],
[
"ND",
-6.365229138404789
],
[
"FT",
-6.365253534087179
],
[
"AN",
-6.367249297442358
],
[
"ML",
-6.381852180160003
],
[
"KP",
-6.391123142939115
],
[
"QK",
-6.394302149436534
],
[
"GP",
-6.401941325596475
],
[
"TN",
-6.403956547079261
],
[
"QP",
-6.404197553058374
],
[
"YS",
-6.410105673199096
],
[
"KQ",
-6.411802700041866
],
[
"MA",
-6.412768742193753
],
[
"QI",
-6.413327913534266
],
[
"TK",
-6.41566797713255
],
[
"QG",
-6.428423592913639
],
[
"SY",
-6.428843129611058
],
[
"YG",
-6.434995604164168
],
[
"QT",
-6.435745599477595
],
[
"EP",
-6.45972157047451
],
[
"FE",
-6.466723609342743
],
[
"VQ",
-6.472310471837574
],
[
"DN",
-6.489782494016833
],
[
"RN",
-6.515197740114932
],
[
"PI",
-6.528423633478061
],
[
"YA",
-6.528628464984113
],
[
"DY",
-6.5318047285495435
],
[
"YR",
-6.536953476351389
],
[
"IY",
-6.54378609550977
],
[
"EF",
-6.545145057417711
],
[
"PQ",
-6.54606492562564
],
[
"AY",
-6.553263294346047
],
[
"PF",
-6.573568517590495
],
[
"YD",
-6.577365144652282
],
[
"NR",
-6.593834310209083
],
[
"VY",
-6.602079353168435
],
[
"FN",
-6.6099051506269255
],
[
"HP",
-6.614479894686015
],
[
"NF",
-6.618948003679922
],
[
"IQ",
-6.61991161209901
],
[
"HA",
-6.622695806762524
],
[
"HG",
-6.624021656499233
],
[
"YV",
-6.628001659591339
],
[
"RY",
-6.630277874220473
],
[
"YF",
-6.63049863655667
],
[
"KY",
-6.631618502412113
],
[
"HR",
-6.63180572066536
],
[
"PK",
-6.638343331559687
],
[
"QN",
-6.642169547520918
],
[
"FR",
-6.645194669612367
],
[
"WL",
-6.649716632288298
],
[
"TQ",
-6.650068339454506
],
[
"NQ",
-6.661808602433574
],
[
"TY",
-6.666546291153427
],
[
"YI",
-6.68117730985826
],
[
"EY",
-6.6825439322110505
],
[
"MS",
-6.685659264332063
],
[
"FK",
-6.686876384621602
],
[
"RH",
-6.692943384891196
],
[
"NY",
-6.693161966088065
],
[
"YT",
-6.696314470581575
],
[
"HS",
-6.699896880261978
],
[
"CL",
-6.7278709776314916
],
[
"MK",
-6.729158869863069
],
[
"YE",
-6.740237162404444
],
[
"PN",
-6.748556004293558
],
[
"DQ",
-6.752494016401274
],
[
"YN",
-6.758084059443041
],
[
"FY",
-6.771219560164635
],
[
"SH",
-6.771929997984971
],
[
"YY",
-6.7785275039670445
],
[
"AH",
-6.784066277504401
],
[
"LC",
-6.795331846955635
],
[
"QD",
-6.799556672133816
],
[
"FP",
-6.799860912828933
],
[
"GH",
-6.816256158703377
],
[
"CG",
-6.822009516354923
],
[
"MT",
-6.826099637598528
],
[
"YK",
-6.83604407670324
],
[
"CS",
-6.840754043865282
],
[
"HV",
-6.856865180562982
],
[
"KF",
-6.858301434538392
],
[
"MV",
-6.891389523832441
],
[
"MR",
-6.905406326458888
],
[
"ME",
-6.934576424556978
],
[
"AAA",
-6.939558197014293
],
[
"YQ",
-6.944059094050768
],
[
"GW",
-6.948908342056194
],
[
"VH",
-6.963592021699016
],
[
"EH",
-6.982434093097101
],
[
"YP",
-6.9922748746909775
],
[
"MG",
-7.0009021061388115
],
[
"SC",
-7.018886742535736
],
[
"PY",
-7.019895159128948
],
[
"MP",
-7.02164299981605
],
[
"WR",
-7.021948635726993
],
[
"MI",
-7.033112682916391
],
[
"HI",
-7.0338288373640605
],
[
"AW",
-7.049906854643689
],
[
"QF",
-7.052730107782535
],
[
"LW",
-7.055909108577195
],
[
"IH",
-7.05696157008979
],
[
"HT",
-7.05933115883659
],
[
"FQ",
-7.064156386962637
],
[
"HF",
-7.081535750783493
],
[
"AC",
-7.08460715565576
],
[
"MD",
-7.091113763016661
],
[
"HD",
-7.093823136459317
],
[
"HH",
-7.100003869785056
],
[
"HE",
-7.115880068784694
],
[
"HQ",
-7.116088208154894
],
[
"RW",
-7.119183575891418
],
[
"TH",
-7.124049547598624
],
[
"PH",
-7.135916194697529
],
[
"MN",
-7.138628413603188
],
[
"QH",
-7.148495617436538
],
[
"QY",
-7.1494242906863565
],
[
"VC",
-7.153299875141329
],
[
"SSS",
-7.15338252469782
],
[
"GC",
-7.162905262469382
],
[
"CR",
-7.165901201911245
],
[
"DH",
-7.177146233283761
],
[
"CA",
-7.189058532078878
],
[
"SW",
-7.191191579586656
],
[
"KH",
-7.208616246230131
],
[
"WS",
-7.234254173429806
],
[
"CV",
-7.242676847889237
],
[
"CP",
-7.278505440314529
],
[
"ALA",
-7.285379876992367
],
[
"IC",
-7.311679838116248
],
[
"DW",
-7.323604216170073
],
[
"WA",
-7.324603069949896
],
[
"RC",
-7.327320003688586
],
[
"NH",
-7.369652102632804
],
[
"FH",
-7.389105632206146
],
[
"HY",
-7.394552973439673
],
[
"VW",
-7.410391432310593
],
[
"LAA",
-7.422581014571122
],
[
"CD",
-7.4236718949362
],
[
"WT",
-7.429297576201453
],
[
"TC",
-7.432660167900888
],
[
"WV",
-7.448373753644203
],
[
"LLL",
-7.468807734079539
],
[
"CT",
-7.470533862026013
],
[
"WG",
-7.475248237654963
],
[
"WI",
-7.489124905202404
],
[
"TW",
-7.490498305455075
],
[
"LLA",
-7.49525893718625
],
[
"HK",
-7.495881789270555
],
[
"WK",
-7.498282114004681
],
[
"YH",
-7.503052514793961
],
[
"GGG",
-7.505862398990461
],
[
"EW",
-7.509287835578606
],
[
"FC",
-7.510894437432199
],
[
"CI",
-7.520762931857856
],
[
"CF",
-7.526143989437605
],
[
"HN",
-7.527737798339141
],
[
"AAL",
-7.559490847705204
],
[
"CE",
-7.5719621985460766
],
[
"DC",
-7.58937449185575
],
[
"PPP",
-7.590470441236201
],
[
"AAG",
-7.598108173051893
],
[
"WQ",
-7.603633426616071
],
[
"WD",
-7.647404262508854
],
[
"WN",
-7.656861213585055
],
[
"CK",
-7.657141392229375
],
[
"WE",
-7.687892619934072
],
[
"PW",
-7.6958759883785
],
[
"EC",
-7.701522174207833
],
[
"NC",
-7.724614716610493
],
[
"AVA",
-7.727330821827604
],
[
"ALL",
-7.739526229609453
],
[
"KC",
-7.75549154972774
],
[
"FW",
-7.758506176574047
],
[
"LLS",
-7.774993372051432
],
[
"YC",
-7.784691819693933
],
[
"WF",
-7.802790805396517
],
[
"SLS",
-7.820351896521961
],
[
"CC",
-7.821005914910156
],
[
"VAA",
-7.840678509005382
],
[
"X",
-8.060884118618663
],
[
"B",
-14.06671018811785
],
[
"Z",
-15.293675060227184
],
[
"U",
-15.973126178825922
],
[
"O",
-19.46823425707293
]
]
}
}