biomed.omics.bl.sm.ma-ted-458m / tokenizer /t5_tokenizer_AA_special.json
SagiPolaczek's picture
Update tokenizer/t5_tokenizer_AA_special.json
d276f0d verified
{
"version": "1.0",
"truncation": null,
"padding": null,
"added_tokens": [
{
"id": 0,
"content": "<UNK>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 1,
"content": "<PAD>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 2,
"content": "<CLS>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 3,
"content": "<SEP>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 4,
"content": "<MASK>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 5,
"content": "<EOS>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 6,
"content": "<MOLECULAR_ENTITY>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 7,
"content": "<GLOBAL_INTERACTION_ATTRIBUTES>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 8,
"content": "<INTERNAL_0>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 9,
"content": "<MOLECULAR_ENTITY_EPITOPE>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 10,
"content": "<INTERNAL_2>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 11,
"content": "<INTERNAL_3>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 12,
"content": "<MOLECULAR_ENTITY_TCR_ALPHA_CHAIN>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 13,
"content": "<MOLECULAR_ENTITY_TCR_BETA_VDJ>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 14,
"content": "<MOLECULAR_ENTITY_TCR_BETA_CDR3>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 15,
"content": "<BINDING_AFFINITY_CLASS>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 16,
"content": "<DECODER_START>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 17,
"content": "<BINDING>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 18,
"content": "<FILLIN>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 19,
"content": "<REORDER>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 20,
"content": "<TOAA>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 21,
"content": "<ACTIVE>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 22,
"content": "<GENESEQ>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 23,
"content": "<INCREASE>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 24,
"content": "<DECREASE>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 25,
"content": "<STRUCTURE>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 26,
"content": "<DISTANCE>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 27,
"content": "<SOLUBILITY>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 28,
"content": "<TOXICITY>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 29,
"content": "<AB>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 30,
"content": "<ISACTIVE>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 31,
"content": "<ISSYNTHETIC>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 32,
"content": "<PENETR>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 33,
"content": "<ABSORPTION>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 34,
"content": "<DISTRIBUTION>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 35,
"content": "<METABOLISM>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 36,
"content": "<EXCRETION>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 37,
"content": "<FLUORESCENCE>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 38,
"content": "<STABILITY>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 39,
"content": "<DISORDER>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 40,
"content": "<DISEASE>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 41,
"content": "<BINARY>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 42,
"content": "<REGRESSION>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 43,
"content": "<ORGANISM>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 44,
"content": "<0>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 45,
"content": "<1>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 46,
"content": "<2>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 47,
"content": "<3>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 48,
"content": "<4>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 49,
"content": "<5>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 50,
"content": "<6>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 51,
"content": "<7>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 52,
"content": "<8>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 53,
"content": "<9>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 54,
"content": "<.>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 55,
"content": "<YES>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 56,
"content": "<NO>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 57,
"content": "<SENTINEL_ID_0>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 58,
"content": "<SENTINEL_ID_1>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 59,
"content": "<SENTINEL_ID_2>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 60,
"content": "<SENTINEL_ID_3>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 61,
"content": "<SENTINEL_ID_4>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 62,
"content": "<SENTINEL_ID_5>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 63,
"content": "<SENTINEL_ID_6>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 64,
"content": "<SENTINEL_ID_7>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 65,
"content": "<SENTINEL_ID_8>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 66,
"content": "<SENTINEL_ID_9>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 67,
"content": "<SENTINEL_ID_10>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 68,
"content": "<SENTINEL_ID_11>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 69,
"content": "<SENTINEL_ID_12>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 70,
"content": "<SENTINEL_ID_13>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 71,
"content": "<SENTINEL_ID_14>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 72,
"content": "<SENTINEL_ID_15>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 73,
"content": "<SENTINEL_ID_16>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 74,
"content": "<SENTINEL_ID_17>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 75,
"content": "<SENTINEL_ID_18>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 76,
"content": "<SENTINEL_ID_19>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 77,
"content": "<SENTINEL_ID_20>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 78,
"content": "<SENTINEL_ID_21>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 79,
"content": "<SENTINEL_ID_22>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 80,
"content": "<SENTINEL_ID_23>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 81,
"content": "<SENTINEL_ID_24>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 82,
"content": "<SENTINEL_ID_25>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 83,
"content": "<SENTINEL_ID_26>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 84,
"content": "<SENTINEL_ID_27>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 85,
"content": "<SENTINEL_ID_28>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 86,
"content": "<SENTINEL_ID_29>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 87,
"content": "<SENTINEL_ID_30>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 88,
"content": "<SENTINEL_ID_31>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 89,
"content": "<SENTINEL_ID_32>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 90,
"content": "<SENTINEL_ID_33>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 91,
"content": "<SENTINEL_ID_34>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 92,
"content": "<SENTINEL_ID_35>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 93,
"content": "<SENTINEL_ID_36>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 94,
"content": "<SENTINEL_ID_37>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 95,
"content": "<SENTINEL_ID_38>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 96,
"content": "<SENTINEL_ID_39>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 97,
"content": "<SENTINEL_ID_40>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 98,
"content": "<SENTINEL_ID_41>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 99,
"content": "<SENTINEL_ID_42>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 100,
"content": "<SENTINEL_ID_43>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 101,
"content": "<SENTINEL_ID_44>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 102,
"content": "<SENTINEL_ID_45>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 103,
"content": "<SENTINEL_ID_46>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 104,
"content": "<SENTINEL_ID_47>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 105,
"content": "<SENTINEL_ID_48>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 106,
"content": "<SENTINEL_ID_49>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 107,
"content": "<SENTINEL_ID_50>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 108,
"content": "<SENTINEL_ID_51>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 109,
"content": "<SENTINEL_ID_52>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 110,
"content": "<SENTINEL_ID_53>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 111,
"content": "<SENTINEL_ID_54>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 112,
"content": "<SENTINEL_ID_55>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 113,
"content": "<SENTINEL_ID_56>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 114,
"content": "<SENTINEL_ID_57>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 115,
"content": "<SENTINEL_ID_58>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 116,
"content": "<SENTINEL_ID_59>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 117,
"content": "<SENTINEL_ID_60>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 118,
"content": "<SENTINEL_ID_61>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 119,
"content": "<SENTINEL_ID_62>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 120,
"content": "<SENTINEL_ID_63>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 121,
"content": "<SENTINEL_ID_64>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 122,
"content": "<SENTINEL_ID_65>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 123,
"content": "<SENTINEL_ID_66>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 124,
"content": "<SENTINEL_ID_67>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 125,
"content": "<SENTINEL_ID_68>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 126,
"content": "<SENTINEL_ID_69>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 127,
"content": "<SENTINEL_ID_70>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 128,
"content": "<SENTINEL_ID_71>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 129,
"content": "<SENTINEL_ID_72>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 130,
"content": "<SENTINEL_ID_73>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 131,
"content": "<SENTINEL_ID_74>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 132,
"content": "<SENTINEL_ID_75>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 133,
"content": "<SENTINEL_ID_76>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 134,
"content": "<SENTINEL_ID_77>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 135,
"content": "<SENTINEL_ID_78>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 136,
"content": "<SENTINEL_ID_79>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 137,
"content": "<SENTINEL_ID_80>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 138,
"content": "<SENTINEL_ID_81>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 139,
"content": "<SENTINEL_ID_82>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 140,
"content": "<SENTINEL_ID_83>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 141,
"content": "<SENTINEL_ID_84>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 142,
"content": "<SENTINEL_ID_85>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 143,
"content": "<SENTINEL_ID_86>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 144,
"content": "<SENTINEL_ID_87>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 145,
"content": "<SENTINEL_ID_88>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 146,
"content": "<SENTINEL_ID_89>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 147,
"content": "<SENTINEL_ID_90>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 148,
"content": "<SENTINEL_ID_91>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 149,
"content": "<SENTINEL_ID_92>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 150,
"content": "<SENTINEL_ID_93>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 151,
"content": "<SENTINEL_ID_94>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 152,
"content": "<SENTINEL_ID_95>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 153,
"content": "<SENTINEL_ID_96>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 154,
"content": "<SENTINEL_ID_97>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 155,
"content": "<SENTINEL_ID_98>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 156,
"content": "<SENTINEL_ID_99>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 157,
"content": "<SENTINEL_ID_100>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 158,
"content": "<SENTINEL_ID_101>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 159,
"content": "<SENTINEL_ID_102>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 160,
"content": "<SENTINEL_ID_103>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 161,
"content": "<SENTINEL_ID_104>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 162,
"content": "<SENTINEL_ID_105>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 163,
"content": "<SENTINEL_ID_106>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 164,
"content": "<SENTINEL_ID_107>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 165,
"content": "<SENTINEL_ID_108>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 166,
"content": "<SENTINEL_ID_109>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 167,
"content": "<SENTINEL_ID_110>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 168,
"content": "<SENTINEL_ID_111>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 169,
"content": "<SENTINEL_ID_112>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 170,
"content": "<SENTINEL_ID_113>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 171,
"content": "<SENTINEL_ID_114>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 172,
"content": "<SENTINEL_ID_115>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 173,
"content": "<SENTINEL_ID_116>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 174,
"content": "<SENTINEL_ID_117>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 175,
"content": "<SENTINEL_ID_118>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 176,
"content": "<SENTINEL_ID_119>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 177,
"content": "<SENTINEL_ID_120>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 178,
"content": "<SENTINEL_ID_121>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 179,
"content": "<SENTINEL_ID_122>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 180,
"content": "<SENTINEL_ID_123>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 181,
"content": "<SENTINEL_ID_124>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 182,
"content": "<SENTINEL_ID_125>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 183,
"content": "<SENTINEL_ID_126>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 184,
"content": "<SENTINEL_ID_127>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 185,
"content": "<SENTINEL_ID_128>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 186,
"content": "<SENTINEL_ID_129>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 187,
"content": "<SENTINEL_ID_130>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 188,
"content": "<SENTINEL_ID_131>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 189,
"content": "<SENTINEL_ID_132>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 190,
"content": "<SENTINEL_ID_133>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 191,
"content": "<SENTINEL_ID_134>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 192,
"content": "<SENTINEL_ID_135>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 193,
"content": "<SENTINEL_ID_136>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 194,
"content": "<SENTINEL_ID_137>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 195,
"content": "<SENTINEL_ID_138>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 196,
"content": "<SENTINEL_ID_139>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 197,
"content": "<SENTINEL_ID_140>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 198,
"content": "<SENTINEL_ID_141>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 199,
"content": "<SENTINEL_ID_142>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 200,
"content": "<SENTINEL_ID_143>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 201,
"content": "<SENTINEL_ID_144>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 202,
"content": "<SENTINEL_ID_145>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 203,
"content": "<SENTINEL_ID_146>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 204,
"content": "<SENTINEL_ID_147>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 205,
"content": "<SENTINEL_ID_148>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 206,
"content": "<SENTINEL_ID_149>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 207,
"content": "<SENTINEL_ID_150>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 208,
"content": "<SENTINEL_ID_151>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 209,
"content": "<SENTINEL_ID_152>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 210,
"content": "<SENTINEL_ID_153>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 211,
"content": "<SENTINEL_ID_154>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 212,
"content": "<SENTINEL_ID_155>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 213,
"content": "<SENTINEL_ID_156>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 214,
"content": "<SENTINEL_ID_157>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 215,
"content": "<SENTINEL_ID_158>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 216,
"content": "<SENTINEL_ID_159>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 217,
"content": "<SENTINEL_ID_160>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 218,
"content": "<SENTINEL_ID_161>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 219,
"content": "<SENTINEL_ID_162>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 220,
"content": "<SENTINEL_ID_163>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 221,
"content": "<SENTINEL_ID_164>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 222,
"content": "<SENTINEL_ID_165>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 223,
"content": "<SENTINEL_ID_166>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 224,
"content": "<SENTINEL_ID_167>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 225,
"content": "<SENTINEL_ID_168>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 226,
"content": "<SENTINEL_ID_169>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 227,
"content": "<SENTINEL_ID_170>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 228,
"content": "<SENTINEL_ID_171>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 229,
"content": "<SENTINEL_ID_172>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 230,
"content": "<SENTINEL_ID_173>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 231,
"content": "<SENTINEL_ID_174>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 232,
"content": "<SENTINEL_ID_175>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 233,
"content": "<SENTINEL_ID_176>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 234,
"content": "<SENTINEL_ID_177>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 235,
"content": "<SENTINEL_ID_178>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 236,
"content": "<SENTINEL_ID_179>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 237,
"content": "<SENTINEL_ID_180>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 238,
"content": "<SENTINEL_ID_181>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 239,
"content": "<SENTINEL_ID_182>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 240,
"content": "<SENTINEL_ID_183>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 241,
"content": "<SENTINEL_ID_184>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 242,
"content": "<SENTINEL_ID_185>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 243,
"content": "<SENTINEL_ID_186>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 244,
"content": "<SENTINEL_ID_187>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 245,
"content": "<SENTINEL_ID_188>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 246,
"content": "<SENTINEL_ID_189>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 247,
"content": "<SENTINEL_ID_190>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 248,
"content": "<SENTINEL_ID_191>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 249,
"content": "<SENTINEL_ID_192>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 250,
"content": "<SENTINEL_ID_193>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 251,
"content": "<SENTINEL_ID_194>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 252,
"content": "<SENTINEL_ID_195>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 253,
"content": "<SENTINEL_ID_196>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 254,
"content": "<SENTINEL_ID_197>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 255,
"content": "<SENTINEL_ID_198>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 256,
"content": "<SENTINEL_ID_199>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 257,
"content": "<INTERNAL_17>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 258,
"content": "<INTERNAL_15>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 259,
"content": "<INTERNAL_16>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 260,
"content": "<ATTRIBUTE_ORGANISM>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 261,
"content": "<ATTRIBUTE_ORGANISM_HUMAN>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 262,
"content": "<ATTRIBUTE_ORGANISM_RABBIT>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 263,
"content": "<ATTRIBUTE_ORGANISM_RAT>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 264,
"content": "<ATTRIBUTE_ORGANISM_MOUSE>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 265,
"content": "<ATTRIBUTE_ORGANISM_MONKEY>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 266,
"content": "<ATTRIBUTE_ORGANISM_CAMEL>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 267,
"content": "<EPITOPE_PARATOPE_PREDICTION>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 268,
"content": "<INTERNAL_7>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 269,
"content": "<INTERNAL_6>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 270,
"content": "<INTERNAL_9>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 271,
"content": "<INTERNAL_5>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 272,
"content": "<INTERNAL_8>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 273,
"content": "<INTERNAL_4>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 274,
"content": "<MOLECULAR_ENTITY_GENERAL_PROTEIN>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 275,
"content": "<TIMESTEP>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 276,
"content": "<DIFFUSION>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 277,
"content": "<SEQUENCE_NATURAL_END>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 278,
"content": "<SMILES_SEQUENCE>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 279,
"content": "<SELFIES_SEQUENCE>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 280,
"content": "<AMINO_ACID_SEQUENCE>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 281,
"content": "<GENERAL_AFFINITY_CLASS>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 282,
"content": "<BACKSPACE>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 283,
"content": "<SEQUENCE_NATURAL_START>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 284,
"content": "<NOOP>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 285,
"content": "<INTERNAL_14>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 286,
"content": "<MOLECULAR_ENTITY_SMALL_MOLECULE>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 287,
"content": "<MOLECULAR_ENTITY_CELL_GENE_EXPRESSION_RANKED>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 288,
"content": "<CELL_TYPE_CLASS>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 289,
"content": "<TISSUE_TYPE_CLASS>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 290,
"content": "<CORRUPTED_AREA_START>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 291,
"content": "<CORRUPTED_AREA_END>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 292,
"content": "<MOLECULAR_ENTITY_MUTATED_PROTEIN_CHAIN>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 293,
"content": "<MOLECULAR_ENTITY_PROTEIN_CHAIN>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 294,
"content": "<COMPLEX_ENTITY>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 295,
"content": "<ALTERNATIVE>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 296,
"content": "<INTERNAL_13>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 297,
"content": "<INTERNAL_12>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 298,
"content": "<SUBMOLECULAR_ENTITY>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 299,
"content": "<MUTATED>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 300,
"content": "<MOLECULAR_ENTITY_TCR_ALPHA_CDR3>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 301,
"content": "<MOLECULAR_ENTITY_TCR_DELTA_CDR3>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 302,
"content": "<MOLECULAR_ENTITY_TCR_DELTA_VAR>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 303,
"content": "<MOLECULAR_ENTITY_TCR_GAMMA_CDR3>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 304,
"content": "<MOLECULAR_ENTITY_TCR_GAMMA_VAR>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 305,
"content": "<SCALAR>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 306,
"content": "<VECTOR>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 307,
"content": "<MASKED_SCALAR>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 308,
"content": "<MASKED_VECTOR>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 309,
"content": "<AUTOENCODER_LATENT_LOG_VARIANCE>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 310,
"content": "<AUTOENCODER_LATENT_MEAN>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 311,
"content": "<AUTOENCODER_LATENT_SAMPLED_Z>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 312,
"content": "<AUTOENCODER_TASK>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 313,
"content": "<DECODED_FROM_LATENT>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
}
],
"normalizer": null,
"pre_tokenizer": {
"type": "Sequence",
"pretokenizers": [
{
"type": "Split",
"pattern": {
"Regex": "<.*?>|\\S"
},
"behavior": "Removed",
"invert": true
}
]
},
"post_processor": null,
"decoder": null,
"model": {
"type": "WordLevel",
"vocab": {
"<UNK>": 0,
"<PAD>": 1,
"<CLS>": 2,
"<SEP>": 3,
"<MASK>": 4,
"<EOS>": 5,
"<MOLECULAR_ENTITY>": 6,
"<GLOBAL_INTERACTION_ATTRIBUTES>": 7,
"<INTERNAL_0>": 8,
"<MOLECULAR_ENTITY_EPITOPE>": 9,
"<INTERNAL_2>": 10,
"<INTERNAL_3>": 11,
"<MOLECULAR_ENTITY_TCR_ALPHA_CHAIN>": 12,
"<MOLECULAR_ENTITY_TCR_BETA_VDJ>": 13,
"<MOLECULAR_ENTITY_TCR_BETA_CDR3>": 14,
"<BINDING_AFFINITY_CLASS>": 15,
"<DECODER_START>": 16,
"<BINDING>": 17,
"<FILLIN>": 18,
"<REORDER>": 19,
"<TOAA>": 20,
"<ACTIVE>": 21,
"<GENESEQ>": 22,
"<INCREASE>": 23,
"<DECREASE>": 24,
"<STRUCTURE>": 25,
"<DISTANCE>": 26,
"<SOLUBILITY>": 27,
"<TOXICITY>": 28,
"<AB>": 29,
"<ISACTIVE>": 30,
"<ISSYNTHETIC>": 31,
"<PENETR>": 32,
"<ABSORPTION>": 33,
"<DISTRIBUTION>": 34,
"<METABOLISM>": 35,
"<EXCRETION>": 36,
"<FLUORESCENCE>": 37,
"<STABILITY>": 38,
"<DISORDER>": 39,
"<DISEASE>": 40,
"<BINARY>": 41,
"<REGRESSION>": 42,
"<ORGANISM>": 43,
"<0>": 44,
"<1>": 45,
"<2>": 46,
"<3>": 47,
"<4>": 48,
"<5>": 49,
"<6>": 50,
"<7>": 51,
"<8>": 52,
"<9>": 53,
"<.>": 54,
"<YES>": 55,
"<NO>": 56,
"<SENTINEL_ID_0>": 57,
"<SENTINEL_ID_1>": 58,
"<SENTINEL_ID_2>": 59,
"<SENTINEL_ID_3>": 60,
"<SENTINEL_ID_4>": 61,
"<SENTINEL_ID_5>": 62,
"<SENTINEL_ID_6>": 63,
"<SENTINEL_ID_7>": 64,
"<SENTINEL_ID_8>": 65,
"<SENTINEL_ID_9>": 66,
"<SENTINEL_ID_10>": 67,
"<SENTINEL_ID_11>": 68,
"<SENTINEL_ID_12>": 69,
"<SENTINEL_ID_13>": 70,
"<SENTINEL_ID_14>": 71,
"<SENTINEL_ID_15>": 72,
"<SENTINEL_ID_16>": 73,
"<SENTINEL_ID_17>": 74,
"<SENTINEL_ID_18>": 75,
"<SENTINEL_ID_19>": 76,
"<SENTINEL_ID_20>": 77,
"<SENTINEL_ID_21>": 78,
"<SENTINEL_ID_22>": 79,
"<SENTINEL_ID_23>": 80,
"<SENTINEL_ID_24>": 81,
"<SENTINEL_ID_25>": 82,
"<SENTINEL_ID_26>": 83,
"<SENTINEL_ID_27>": 84,
"<SENTINEL_ID_28>": 85,
"<SENTINEL_ID_29>": 86,
"<SENTINEL_ID_30>": 87,
"<SENTINEL_ID_31>": 88,
"<SENTINEL_ID_32>": 89,
"<SENTINEL_ID_33>": 90,
"<SENTINEL_ID_34>": 91,
"<SENTINEL_ID_35>": 92,
"<SENTINEL_ID_36>": 93,
"<SENTINEL_ID_37>": 94,
"<SENTINEL_ID_38>": 95,
"<SENTINEL_ID_39>": 96,
"<SENTINEL_ID_40>": 97,
"<SENTINEL_ID_41>": 98,
"<SENTINEL_ID_42>": 99,
"<SENTINEL_ID_43>": 100,
"<SENTINEL_ID_44>": 101,
"<SENTINEL_ID_45>": 102,
"<SENTINEL_ID_46>": 103,
"<SENTINEL_ID_47>": 104,
"<SENTINEL_ID_48>": 105,
"<SENTINEL_ID_49>": 106,
"<SENTINEL_ID_50>": 107,
"<SENTINEL_ID_51>": 108,
"<SENTINEL_ID_52>": 109,
"<SENTINEL_ID_53>": 110,
"<SENTINEL_ID_54>": 111,
"<SENTINEL_ID_55>": 112,
"<SENTINEL_ID_56>": 113,
"<SENTINEL_ID_57>": 114,
"<SENTINEL_ID_58>": 115,
"<SENTINEL_ID_59>": 116,
"<SENTINEL_ID_60>": 117,
"<SENTINEL_ID_61>": 118,
"<SENTINEL_ID_62>": 119,
"<SENTINEL_ID_63>": 120,
"<SENTINEL_ID_64>": 121,
"<SENTINEL_ID_65>": 122,
"<SENTINEL_ID_66>": 123,
"<SENTINEL_ID_67>": 124,
"<SENTINEL_ID_68>": 125,
"<SENTINEL_ID_69>": 126,
"<SENTINEL_ID_70>": 127,
"<SENTINEL_ID_71>": 128,
"<SENTINEL_ID_72>": 129,
"<SENTINEL_ID_73>": 130,
"<SENTINEL_ID_74>": 131,
"<SENTINEL_ID_75>": 132,
"<SENTINEL_ID_76>": 133,
"<SENTINEL_ID_77>": 134,
"<SENTINEL_ID_78>": 135,
"<SENTINEL_ID_79>": 136,
"<SENTINEL_ID_80>": 137,
"<SENTINEL_ID_81>": 138,
"<SENTINEL_ID_82>": 139,
"<SENTINEL_ID_83>": 140,
"<SENTINEL_ID_84>": 141,
"<SENTINEL_ID_85>": 142,
"<SENTINEL_ID_86>": 143,
"<SENTINEL_ID_87>": 144,
"<SENTINEL_ID_88>": 145,
"<SENTINEL_ID_89>": 146,
"<SENTINEL_ID_90>": 147,
"<SENTINEL_ID_91>": 148,
"<SENTINEL_ID_92>": 149,
"<SENTINEL_ID_93>": 150,
"<SENTINEL_ID_94>": 151,
"<SENTINEL_ID_95>": 152,
"<SENTINEL_ID_96>": 153,
"<SENTINEL_ID_97>": 154,
"<SENTINEL_ID_98>": 155,
"<SENTINEL_ID_99>": 156,
"<SENTINEL_ID_100>": 157,
"<SENTINEL_ID_101>": 158,
"<SENTINEL_ID_102>": 159,
"<SENTINEL_ID_103>": 160,
"<SENTINEL_ID_104>": 161,
"<SENTINEL_ID_105>": 162,
"<SENTINEL_ID_106>": 163,
"<SENTINEL_ID_107>": 164,
"<SENTINEL_ID_108>": 165,
"<SENTINEL_ID_109>": 166,
"<SENTINEL_ID_110>": 167,
"<SENTINEL_ID_111>": 168,
"<SENTINEL_ID_112>": 169,
"<SENTINEL_ID_113>": 170,
"<SENTINEL_ID_114>": 171,
"<SENTINEL_ID_115>": 172,
"<SENTINEL_ID_116>": 173,
"<SENTINEL_ID_117>": 174,
"<SENTINEL_ID_118>": 175,
"<SENTINEL_ID_119>": 176,
"<SENTINEL_ID_120>": 177,
"<SENTINEL_ID_121>": 178,
"<SENTINEL_ID_122>": 179,
"<SENTINEL_ID_123>": 180,
"<SENTINEL_ID_124>": 181,
"<SENTINEL_ID_125>": 182,
"<SENTINEL_ID_126>": 183,
"<SENTINEL_ID_127>": 184,
"<SENTINEL_ID_128>": 185,
"<SENTINEL_ID_129>": 186,
"<SENTINEL_ID_130>": 187,
"<SENTINEL_ID_131>": 188,
"<SENTINEL_ID_132>": 189,
"<SENTINEL_ID_133>": 190,
"<SENTINEL_ID_134>": 191,
"<SENTINEL_ID_135>": 192,
"<SENTINEL_ID_136>": 193,
"<SENTINEL_ID_137>": 194,
"<SENTINEL_ID_138>": 195,
"<SENTINEL_ID_139>": 196,
"<SENTINEL_ID_140>": 197,
"<SENTINEL_ID_141>": 198,
"<SENTINEL_ID_142>": 199,
"<SENTINEL_ID_143>": 200,
"<SENTINEL_ID_144>": 201,
"<SENTINEL_ID_145>": 202,
"<SENTINEL_ID_146>": 203,
"<SENTINEL_ID_147>": 204,
"<SENTINEL_ID_148>": 205,
"<SENTINEL_ID_149>": 206,
"<SENTINEL_ID_150>": 207,
"<SENTINEL_ID_151>": 208,
"<SENTINEL_ID_152>": 209,
"<SENTINEL_ID_153>": 210,
"<SENTINEL_ID_154>": 211,
"<SENTINEL_ID_155>": 212,
"<SENTINEL_ID_156>": 213,
"<SENTINEL_ID_157>": 214,
"<SENTINEL_ID_158>": 215,
"<SENTINEL_ID_159>": 216,
"<SENTINEL_ID_160>": 217,
"<SENTINEL_ID_161>": 218,
"<SENTINEL_ID_162>": 219,
"<SENTINEL_ID_163>": 220,
"<SENTINEL_ID_164>": 221,
"<SENTINEL_ID_165>": 222,
"<SENTINEL_ID_166>": 223,
"<SENTINEL_ID_167>": 224,
"<SENTINEL_ID_168>": 225,
"<SENTINEL_ID_169>": 226,
"<SENTINEL_ID_170>": 227,
"<SENTINEL_ID_171>": 228,
"<SENTINEL_ID_172>": 229,
"<SENTINEL_ID_173>": 230,
"<SENTINEL_ID_174>": 231,
"<SENTINEL_ID_175>": 232,
"<SENTINEL_ID_176>": 233,
"<SENTINEL_ID_177>": 234,
"<SENTINEL_ID_178>": 235,
"<SENTINEL_ID_179>": 236,
"<SENTINEL_ID_180>": 237,
"<SENTINEL_ID_181>": 238,
"<SENTINEL_ID_182>": 239,
"<SENTINEL_ID_183>": 240,
"<SENTINEL_ID_184>": 241,
"<SENTINEL_ID_185>": 242,
"<SENTINEL_ID_186>": 243,
"<SENTINEL_ID_187>": 244,
"<SENTINEL_ID_188>": 245,
"<SENTINEL_ID_189>": 246,
"<SENTINEL_ID_190>": 247,
"<SENTINEL_ID_191>": 248,
"<SENTINEL_ID_192>": 249,
"<SENTINEL_ID_193>": 250,
"<SENTINEL_ID_194>": 251,
"<SENTINEL_ID_195>": 252,
"<SENTINEL_ID_196>": 253,
"<SENTINEL_ID_197>": 254,
"<SENTINEL_ID_198>": 255,
"<SENTINEL_ID_199>": 256,
"<INTERNAL_17>": 257,
"<INTERNAL_15>": 258,
"<INTERNAL_16>": 259,
"<ATTRIBUTE_ORGANISM>": 260,
"<ATTRIBUTE_ORGANISM_HUMAN>": 261,
"<ATTRIBUTE_ORGANISM_RABBIT>": 262,
"<ATTRIBUTE_ORGANISM_RAT>": 263,
"<ATTRIBUTE_ORGANISM_MOUSE>": 264,
"<ATTRIBUTE_ORGANISM_MONKEY>": 265,
"<ATTRIBUTE_ORGANISM_CAMEL>": 266,
"<EPITOPE_PARATOPE_PREDICTION>": 267,
"<INTERNAL_7>": 268,
"<INTERNAL_6>": 269,
"<INTERNAL_9>": 270,
"<INTERNAL_5>": 271,
"<INTERNAL_8>": 272,
"<INTERNAL_4>": 273,
"<MOLECULAR_ENTITY_GENERAL_PROTEIN>": 274,
"<TIMESTEP>": 275,
"<DIFFUSION>": 276,
"<SEQUENCE_NATURAL_END>": 277,
"<SMILES_SEQUENCE>": 278,
"<SELFIES_SEQUENCE>": 279,
"<AMINO_ACID_SEQUENCE>": 280,
"<GENERAL_AFFINITY_CLASS>": 281,
"<BACKSPACE>": 282,
"<SEQUENCE_NATURAL_START>": 283,
"<NOOP>": 284,
"<INTERNAL_14>": 285,
"<MOLECULAR_ENTITY_SMALL_MOLECULE>": 286,
"<MOLECULAR_ENTITY_CELL_GENE_EXPRESSION_RANKED>": 287,
"<CELL_TYPE_CLASS>": 288,
"<TISSUE_TYPE_CLASS>": 289,
"<CORRUPTED_AREA_START>": 290,
"<CORRUPTED_AREA_END>": 291,
"<MOLECULAR_ENTITY_MUTATED_PROTEIN_CHAIN>": 292,
"<MOLECULAR_ENTITY_PROTEIN_CHAIN>": 293,
"<COMPLEX_ENTITY>": 294,
"<ALTERNATIVE>": 295,
"<INTERNAL_13>": 296,
"<INTERNAL_12>": 297,
"<SUBMOLECULAR_ENTITY>": 298,
"<MUTATED>": 299,
"<MOLECULAR_ENTITY_TCR_ALPHA_CDR3>": 300,
"<MOLECULAR_ENTITY_TCR_DELTA_CDR3>": 301,
"<MOLECULAR_ENTITY_TCR_DELTA_VAR>": 302,
"<MOLECULAR_ENTITY_TCR_GAMMA_CDR3>": 303,
"<MOLECULAR_ENTITY_TCR_GAMMA_VAR>": 304,
"<SCALAR>": 305,
"<VECTOR>": 306,
"<MASKED_SCALAR>": 307,
"<MASKED_VECTOR>": 308,
"<AUTOENCODER_LATENT_LOG_VARIANCE>": 309,
"<AUTOENCODER_LATENT_MEAN>": 310,
"<AUTOENCODER_LATENT_SAMPLED_Z>": 311,
"<AUTOENCODER_TASK>": 312,
"<DECODED_FROM_LATENT>": 313,
"A": 501,
"B": 502,
"C": 503,
"D": 504,
"E": 505,
"F": 506,
"G": 507,
"H": 508,
"I": 509,
"K": 510,
"L": 511,
"M": 512,
"N": 513,
"O": 514,
"P": 515,
"Q": 516,
"R": 517,
"S": 518,
"T": 519,
"U": 520,
"V": 521,
"W": 522,
"X": 523,
"Y": 524,
"Z": 525,
":": 526
},
"unk_token": "<UNK>"
}
}