Tomer
first commit
845829e
{
"version": "1.0",
"truncation": null,
"padding": {
"strategy": "BatchLongest",
"direction": "Right",
"pad_to_multiple_of": null,
"pad_id": 0,
"pad_type_id": 0,
"pad_token": "<pad>"
},
"added_tokens": [
{
"id": 0,
"content": "<pad>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 1,
"content": "<gap>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 2,
"content": "<s>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 3,
"content": "</s>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 4,
"content": "<unk>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 5,
"content": "<cls>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 6,
"content": "<sep>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 7,
"content": "<expr_top10>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 8,
"content": "<expr_pre75_90>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 9,
"content": "<expr_pre50_75>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 10,
"content": "<expr_pre25_50>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 11,
"content": "<expr_low25>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 12,
"content": "<expr_unk>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 13,
"content": "<seqpos_begin>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 14,
"content": "<seqpos_end>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 15,
"content": "<seqpos_mid>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 16,
"content": "<sim_high>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 17,
"content": "<sim_low>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 18,
"content": "<S_cerevisiae>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 19,
"content": "<S_pombe>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 20,
"content": "<E_coli>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 21,
"content": "<B_subtilis>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 22,
"content": "<mask_*>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 23,
"content": "<mask_S>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 24,
"content": "<mask_W>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 25,
"content": "<mask_D>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 26,
"content": "<mask_N>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 27,
"content": "<mask_V>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 28,
"content": "<mask_H>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 29,
"content": "<mask_A>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 30,
"content": "<mask_K>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 31,
"content": "<mask_G>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 32,
"content": "<mask_Q>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 33,
"content": "<mask_F>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 34,
"content": "<mask_I>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 35,
"content": "<mask_T>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 36,
"content": "<mask_C>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 37,
"content": "<mask_L>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 38,
"content": "<mask_P>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 39,
"content": "<mask_E>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 40,
"content": "<mask_M>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 41,
"content": "<mask_Y>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 42,
"content": "<mask_R>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 43,
"content": "TTT",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": true,
"special": false
},
{
"id": 44,
"content": "TTC",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": true,
"special": false
},
{
"id": 45,
"content": "TTA",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": true,
"special": false
},
{
"id": 46,
"content": "TTG",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": true,
"special": false
},
{
"id": 47,
"content": "TCT",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": true,
"special": false
},
{
"id": 48,
"content": "TCC",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": true,
"special": false
},
{
"id": 49,
"content": "TCA",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": true,
"special": false
},
{
"id": 50,
"content": "TCG",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": true,
"special": false
},
{
"id": 51,
"content": "TAT",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": true,
"special": false
},
{
"id": 52,
"content": "TAC",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": true,
"special": false
},
{
"id": 53,
"content": "TAA",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": true,
"special": false
},
{
"id": 54,
"content": "TAG",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": true,
"special": false
},
{
"id": 55,
"content": "TGT",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": true,
"special": false
},
{
"id": 56,
"content": "TGC",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": true,
"special": false
},
{
"id": 57,
"content": "TGA",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": true,
"special": false
},
{
"id": 58,
"content": "TGG",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": true,
"special": false
},
{
"id": 59,
"content": "CTT",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": true,
"special": false
},
{
"id": 60,
"content": "CTC",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": true,
"special": false
},
{
"id": 61,
"content": "CTA",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": true,
"special": false
},
{
"id": 62,
"content": "CTG",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": true,
"special": false
},
{
"id": 63,
"content": "CCT",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": true,
"special": false
},
{
"id": 64,
"content": "CCC",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": true,
"special": false
},
{
"id": 65,
"content": "CCA",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": true,
"special": false
},
{
"id": 66,
"content": "CCG",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": true,
"special": false
},
{
"id": 67,
"content": "CAT",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": true,
"special": false
},
{
"id": 68,
"content": "CAC",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": true,
"special": false
},
{
"id": 69,
"content": "CAA",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": true,
"special": false
},
{
"id": 70,
"content": "CAG",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": true,
"special": false
},
{
"id": 71,
"content": "CGT",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": true,
"special": false
},
{
"id": 72,
"content": "CGC",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": true,
"special": false
},
{
"id": 73,
"content": "CGA",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": true,
"special": false
},
{
"id": 74,
"content": "CGG",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": true,
"special": false
},
{
"id": 75,
"content": "ATT",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": true,
"special": false
},
{
"id": 76,
"content": "ATC",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": true,
"special": false
},
{
"id": 77,
"content": "ATA",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": true,
"special": false
},
{
"id": 78,
"content": "ATG",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": true,
"special": false
},
{
"id": 79,
"content": "ACT",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": true,
"special": false
},
{
"id": 80,
"content": "ACC",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": true,
"special": false
},
{
"id": 81,
"content": "ACA",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": true,
"special": false
},
{
"id": 82,
"content": "ACG",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": true,
"special": false
},
{
"id": 83,
"content": "AAT",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": true,
"special": false
},
{
"id": 84,
"content": "AAC",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": true,
"special": false
},
{
"id": 85,
"content": "AAA",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": true,
"special": false
},
{
"id": 86,
"content": "AAG",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": true,
"special": false
},
{
"id": 87,
"content": "AGT",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": true,
"special": false
},
{
"id": 88,
"content": "AGC",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": true,
"special": false
},
{
"id": 89,
"content": "AGA",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": true,
"special": false
},
{
"id": 90,
"content": "AGG",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": true,
"special": false
},
{
"id": 91,
"content": "GTT",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": true,
"special": false
},
{
"id": 92,
"content": "GTC",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": true,
"special": false
},
{
"id": 93,
"content": "GTA",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": true,
"special": false
},
{
"id": 94,
"content": "GTG",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": true,
"special": false
},
{
"id": 95,
"content": "GCT",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": true,
"special": false
},
{
"id": 96,
"content": "GCC",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": true,
"special": false
},
{
"id": 97,
"content": "GCA",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": true,
"special": false
},
{
"id": 98,
"content": "GCG",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": true,
"special": false
},
{
"id": 99,
"content": "GAT",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": true,
"special": false
},
{
"id": 100,
"content": "GAC",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": true,
"special": false
},
{
"id": 101,
"content": "GAA",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": true,
"special": false
},
{
"id": 102,
"content": "GAG",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": true,
"special": false
},
{
"id": 103,
"content": "GGT",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": true,
"special": false
},
{
"id": 104,
"content": "GGC",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": true,
"special": false
},
{
"id": 105,
"content": "GGA",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": true,
"special": false
},
{
"id": 106,
"content": "GGG",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": true,
"special": false
},
{
"id": 107,
"content": "<msk>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
}
],
"normalizer": null,
"pre_tokenizer": {
"type": "Whitespace"
},
"post_processor": {
"type": "TemplateProcessing",
"single": [
{
"SpecialToken": {
"id": "<cls>",
"type_id": 0
}
},
{
"Sequence": {
"id": "A",
"type_id": 0
}
},
{
"SpecialToken": {
"id": "<sep>",
"type_id": 0
}
}
],
"pair": [
{
"Sequence": {
"id": "A",
"type_id": 0
}
},
{
"Sequence": {
"id": "B",
"type_id": 0
}
}
],
"special_tokens": {
"<cls>": {
"id": "<cls>",
"ids": [
5
],
"tokens": [
"<cls>"
]
},
"<sep>": {
"id": "<sep>",
"ids": [
6
],
"tokens": [
"<sep>"
]
}
}
},
"decoder": null,
"model": {
"type": "WordLevel",
"vocab": {},
"unk_token": "<unk>"
}
}