PubChem-10m-t5-v2 / tokenizer.json
sagawa's picture
Upload 6 files
5359673
{
"version": "1.0",
"truncation": null,
"padding": null,
"added_tokens": [
{
"id": 0,
"content": "<pad>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 1,
"content": "</s>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 2,
"content": "<unk>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 66,
"content": "<extra_id_0>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 67,
"content": "<extra_id_1>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 68,
"content": "<extra_id_2>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 69,
"content": "<extra_id_3>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 70,
"content": "<extra_id_4>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 71,
"content": "<extra_id_5>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 72,
"content": "<extra_id_6>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 73,
"content": "<extra_id_7>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 74,
"content": "<extra_id_8>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 75,
"content": "<extra_id_9>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 76,
"content": "<extra_id_10>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 77,
"content": "<extra_id_11>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 78,
"content": "<extra_id_12>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 79,
"content": "<extra_id_13>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 80,
"content": "<extra_id_14>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 81,
"content": "<extra_id_15>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 82,
"content": "<extra_id_16>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 83,
"content": "<extra_id_17>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 84,
"content": "<extra_id_18>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 85,
"content": "<extra_id_19>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 86,
"content": "<extra_id_20>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 87,
"content": "<extra_id_21>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 88,
"content": "<extra_id_22>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 89,
"content": "<extra_id_23>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 90,
"content": "<extra_id_24>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 91,
"content": "<extra_id_25>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 92,
"content": "<extra_id_26>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 93,
"content": "<extra_id_27>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 94,
"content": "<extra_id_28>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 95,
"content": "<extra_id_29>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 96,
"content": "<extra_id_30>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 97,
"content": "<extra_id_31>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 98,
"content": "<extra_id_32>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 99,
"content": "<extra_id_33>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 100,
"content": "<extra_id_34>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 101,
"content": "<extra_id_35>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 102,
"content": "<extra_id_36>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 103,
"content": "<extra_id_37>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 104,
"content": "<extra_id_38>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 105,
"content": "<extra_id_39>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 106,
"content": "<extra_id_40>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 107,
"content": "<extra_id_41>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 108,
"content": "<extra_id_42>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 109,
"content": "<extra_id_43>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 110,
"content": "<extra_id_44>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 111,
"content": "<extra_id_45>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 112,
"content": "<extra_id_46>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 113,
"content": "<extra_id_47>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 114,
"content": "<extra_id_48>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 115,
"content": "<extra_id_49>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 116,
"content": "<extra_id_50>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 117,
"content": "<extra_id_51>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 118,
"content": "<extra_id_52>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 119,
"content": "<extra_id_53>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 120,
"content": "<extra_id_54>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 121,
"content": "<extra_id_55>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 122,
"content": "<extra_id_56>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 123,
"content": "<extra_id_57>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 124,
"content": "<extra_id_58>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 125,
"content": "<extra_id_59>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 126,
"content": "<extra_id_60>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 127,
"content": "<extra_id_61>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 128,
"content": "<extra_id_62>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 129,
"content": "<extra_id_63>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 130,
"content": "<extra_id_64>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 131,
"content": "<extra_id_65>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 132,
"content": "<extra_id_66>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 133,
"content": "<extra_id_67>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 134,
"content": "<extra_id_68>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 135,
"content": "<extra_id_69>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 136,
"content": "<extra_id_70>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 137,
"content": "<extra_id_71>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 138,
"content": "<extra_id_72>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 139,
"content": "<extra_id_73>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 140,
"content": "<extra_id_74>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 141,
"content": "<extra_id_75>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 142,
"content": "<extra_id_76>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 143,
"content": "<extra_id_77>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 144,
"content": "<extra_id_78>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 145,
"content": "<extra_id_79>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 146,
"content": "<extra_id_80>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 147,
"content": "<extra_id_81>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 148,
"content": "<extra_id_82>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 149,
"content": "<extra_id_83>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 150,
"content": "<extra_id_84>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 151,
"content": "<extra_id_85>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 152,
"content": "<extra_id_86>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 153,
"content": "<extra_id_87>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 154,
"content": "<extra_id_88>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 155,
"content": "<extra_id_89>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 156,
"content": "<extra_id_90>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 157,
"content": "<extra_id_91>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 158,
"content": "<extra_id_92>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 159,
"content": "<extra_id_93>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 160,
"content": "<extra_id_94>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 161,
"content": "<extra_id_95>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 162,
"content": "<extra_id_96>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 163,
"content": "<extra_id_97>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 164,
"content": "<extra_id_98>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 165,
"content": "<extra_id_99>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
}
],
"normalizer": {
"type": "Sequence",
"normalizers": [
{
"type": "Nmt"
},
{
"type": "NFKC"
},
{
"type": "Replace",
"pattern": {
"Regex": " {2,}"
},
"content": " "
}
]
},
"pre_tokenizer": {
"type": "Sequence",
"pretokenizers": [
{
"type": "Metaspace",
"replacement": "▁",
"add_prefix_space": true
},
{
"type": "Digits",
"individual_digits": true
},
{
"type": "Punctuation",
"behavior": "Isolated"
}
]
},
"post_processor": {
"type": "TemplateProcessing",
"single": [
{
"Sequence": {
"id": "A",
"type_id": 0
}
},
{
"SpecialToken": {
"id": "</s>",
"type_id": 0
}
}
],
"pair": [
{
"Sequence": {
"id": "A",
"type_id": 0
}
},
{
"Sequence": {
"id": "B",
"type_id": 1
}
}
],
"special_tokens": {
"</s>": {
"id": "</s>",
"ids": [
1
],
"tokens": [
"</s>"
]
}
}
},
"decoder": {
"type": "Metaspace",
"replacement": "▁",
"add_prefix_space": true
},
"model": {
"type": "Unigram",
"unk_id": 2,
"vocab": [
[
"<pad>",
0.0
],
[
"</s>",
0.0
],
[
"<unk>",
0.0
],
[
"▁",
-0.6931471811208318
],
[
"c",
-2.220290809829148
],
[
"C",
-2.2673690803860964
],
[
")",
-2.9981361170080127
],
[
"(",
-2.9981361170080127
],
[
"1",
-3.4844890198102156
],
[
"O",
-3.5511292076377536
],
[
"2",
-3.863252526882995
],
[
"N",
-3.954480326557153
],
[
"=",
-3.9639438163153393
],
[
"n",
-4.49249510621453
],
[
"3",
-4.706416340241001
],
[
"]",
-4.861383518381034
],
[
"[",
-4.861383518381034
],
[
"H",
-5.301586812349004
],
[
"+",
-5.3673934007416815
],
[
"F",
-5.4124559289081144
],
[
"-",
-5.423110161151882
],
[
"S",
-5.8215121558686835
],
[
"4",
-5.9380633677755785
],
[
"l",
-5.992632624740358
],
[
"s",
-6.538637721159365
],
[
"B",
-6.863075351404065
],
[
"o",
-6.897179397445873
],
[
"r",
-6.910694347401536
],
[
"5",
-7.123649658940504
],
[
"#",
-7.147919182580168
],
[
"6",
-8.065038575838681
],
[
"i",
-8.687375580014221
],
[
"P",
-8.71389638203985
],
[
"I",
-8.797484341222447
],
[
"7",
-8.856288399584308
],
[
"8",
-9.60822991649292
],
[
"%",
-9.830908073412212
],
[
"9",
-10.315614217995469
],
[
"0",
-10.929791641751478
],
[
"e",
-11.284192796317075
],
[
"A",
-12.579082839534651
],
[
"p",
-13.066934980384708
],
[
"G",
-13.257515536986528
],
[
"T",
-13.400122608746443
],
[
"b",
-14.060850205484396
],
[
"g",
-14.0738373987575
],
[
"W",
-14.763359812563314
],
[
"a",
-15.212454252958995
],
[
"t",
-15.41262074816509
],
[
"R",
-15.55529509363809
],
[
"u",
-15.62132543499004
],
[
"V",
-16.125344933044865
],
[
"M",
-16.324758316156874
],
[
"Z",
-16.522364723441015
],
[
"h",
-17.331148753511044
],
[
"d",
-17.867339476497104
],
[
"X",
-17.93400614316378
],
[
"U",
-18.08235779151549
],
[
"f",
-18.165691124848927
],
[
"Y",
-18.165691124848927
],
[
"K",
-18.592711326881748
],
[
"L",
-18.735568469778855
],
[
"m",
-18.90223513644552
],
[
"E",
-18.90223513644552
],
[
"y",
-21.185568469778858
],
[
"D",
-21.185568469778858
]
]
}
}