deep_unigram_50 / tokenizer.json
GinnM's picture
Upload tokenizer
84ed39b
{
"version": "1.0",
"truncation": null,
"padding": null,
"added_tokens": [
{
"id": 0,
"content": "<pad>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 1,
"content": "<cls>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 2,
"content": "<sep>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 3,
"content": "<unk>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 4,
"content": "<mask>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
}
],
"normalizer": null,
"pre_tokenizer": null,
"post_processor": {
"type": "TemplateProcessing",
"single": [
{
"SpecialToken": {
"id": "<cls>",
"type_id": 0
}
},
{
"Sequence": {
"id": "A",
"type_id": 0
}
},
{
"SpecialToken": {
"id": "<sep>",
"type_id": 0
}
}
],
"pair": [
{
"SpecialToken": {
"id": "<cls>",
"type_id": 0
}
},
{
"Sequence": {
"id": "A",
"type_id": 0
}
},
{
"SpecialToken": {
"id": "<sep>",
"type_id": 0
}
},
{
"Sequence": {
"id": "B",
"type_id": 1
}
},
{
"SpecialToken": {
"id": "<sep>",
"type_id": 1
}
}
],
"special_tokens": {
"<cls>": {
"id": "<cls>",
"ids": [
1
],
"tokens": [
"<cls>"
]
},
"<sep>": {
"id": "<sep>",
"ids": [
2
],
"tokens": [
"<sep>"
]
}
}
},
"decoder": {
"type": "Metaspace",
"replacement": "▁",
"add_prefix_space": true
},
"model": {
"type": "Unigram",
"unk_id": 3,
"vocab": [
[
"<pad>",
0.0
],
[
"<cls>",
0.0
],
[
"<sep>",
0.0
],
[
"<unk>",
0.0
],
[
"<mask>",
0.0
],
[
"L",
-2.7509312510529185
],
[
"R",
-2.755084699217022
],
[
"V",
-2.8114993435449485
],
[
"T",
-2.818179108697013
],
[
"D",
-2.8358745908237744
],
[
"I",
-2.8386883982403575
],
[
"S",
-2.8508125229928503
],
[
"G",
-2.8876033679551227
],
[
"E",
-2.9024328890049524
],
[
"P",
-2.911018940638389
],
[
"K",
-2.9183231107580596
],
[
"A",
-2.919747614014888
],
[
"N",
-3.123051382634724
],
[
"F",
-3.17920066040503
],
[
"Q",
-3.190869184456883
],
[
"Y",
-3.4628954984309086
],
[
"H",
-3.741564333998106
],
[
"M",
-3.771740452244479
],
[
"C",
-4.177244477126914
],
[
"W",
-4.26244359647038
],
[
"AA",
-5.151553502459924
],
[
"LL",
-5.297524048938797
],
[
"LA",
-5.433632759818389
],
[
"AL",
-5.5585947635634465
],
[
"SS",
-5.653368570054353
],
[
"LS",
-5.6542024366077595
],
[
"SL",
-5.774350740282891
],
[
"AG",
-5.927206325143018
],
[
"VL",
-5.971670833218097
],
[
"GG",
-5.998372467775059
],
[
"VA",
-6.014387210129083
],
[
"SA",
-6.0238415835311105
],
[
"LV",
-6.027713726026638
],
[
"EL",
-6.053145724740345
],
[
"AV",
-6.058233627458359
],
[
"GA",
-6.083252372183383
],
[
"LG",
-6.1040977735585
],
[
"AS",
-6.118553954419422
],
[
"SG",
-6.132691839839374
],
[
"EA",
-6.152907367995331
],
[
"X",
-7.727541617091807
],
[
"B",
-14.572756361550985
],
[
"Z",
-15.799721233660796
],
[
"U",
-16.47917235225718
],
[
"O",
-19.974280430470184
]
]
}
}