marian-finetuned-whitespace / tokenizer.json
GiGi2044's picture
Training in progress, epoch 1
0ceae02 verified
raw
history blame
No virus
2.3 kB
{
"version": "1.0",
"truncation": {
"direction": "Right",
"max_length": 128,
"strategy": "LongestFirst",
"stride": 0
},
"padding": {
"strategy": {
"Fixed": 128
},
"direction": "Right",
"pad_to_multiple_of": null,
"pad_id": 0,
"pad_type_id": 0,
"pad_token": "[PAD]"
},
"added_tokens": [
{
"id": 0,
"content": "[UNK]",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 1,
"content": "<s>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 2,
"content": "</s>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
}
],
"normalizer": null,
"pre_tokenizer": {
"type": "Whitespace"
},
"post_processor": null,
"decoder": null,
"model": {
"type": "WordLevel",
"vocab": {
"[UNK]": 0,
"<s>": 1,
"</s>": 2,
"\"": 3,
"{{": 4,
"\":\"": 5,
",": 6,
"}": 7,
"\":": 8,
"name": 9,
"{": 10,
"\":{": 11,
"id": 12,
"ktl": 13,
"pli": 14,
"translation": 15,
"}}": 16,
"},": 17,
"}}\",": 18,
"value": 19,
"}}\"": 20,
":": 21,
"param0": 22,
"type0": 23,
"\":\"{{": 24,
"PROCEDURE": 25,
"fun": 26,
"return": 27,
"\",": 28,
"CALL": 29,
"type": 30,
"type1": 31,
"}}()\"": 32,
"}}({{": 33,
"}})\"": 34,
"\":\"{\"": 35,
"\":\"}\"": 36,
"(": 37,
"({{": 38,
"0": 39,
"1": 40,
"10": 41,
"2": 42,
"3": 43,
"4": 44,
"5": 45,
"6": 46,
"7": 47,
"8": 48,
"9": 49,
"<{{": 50,
"=": 51,
"ASSIGN": 52,
"DECLARE": 53,
"DO": 54,
"END": 55,
"IF": 56,
"MAIN": 57,
"RETURN": 58,
"THEN": 59,
"[": 60,
"]": 61,
"args": 62,
"data": 63,
"if": 64,
"main": 65,
"var": 66,
"}})": 67,
"}},{{": 68,
"}}>)\"": 69
},
"unk_token": "[UNK]"
}
}