codereviewer / tokenizer_config.json
josephj6802's picture
Init commit
99750fb
raw
history blame contribute delete
No virus
21.2 kB
{
"add_prefix_space": false,
"added_tokens_decoder": {
"0": {
"content": "<pad>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": true
},
"1": {
"content": "<s>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": true
},
"2": {
"content": "</s>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": true
},
"3": {
"content": "<unk>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": true
},
"4": {
"content": "<mask>",
"lstrip": true,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": true
},
"32100": {
"content": "<keep>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"32101": {
"content": "<add>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"32102": {
"content": "<del>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"32103": {
"content": "<start>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"32104": {
"content": "<end>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"32105": {
"content": "<e99>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"32106": {
"content": "<e98>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"32107": {
"content": "<e97>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"32108": {
"content": "<e96>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"32109": {
"content": "<e95>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"32110": {
"content": "<e94>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"32111": {
"content": "<e93>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"32112": {
"content": "<e92>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"32113": {
"content": "<e91>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"32114": {
"content": "<e90>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"32115": {
"content": "<e89>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"32116": {
"content": "<e88>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"32117": {
"content": "<e87>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"32118": {
"content": "<e86>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"32119": {
"content": "<e85>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"32120": {
"content": "<e84>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"32121": {
"content": "<e83>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"32122": {
"content": "<e82>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"32123": {
"content": "<e81>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"32124": {
"content": "<e80>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"32125": {
"content": "<e79>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"32126": {
"content": "<e78>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"32127": {
"content": "<e77>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"32128": {
"content": "<e76>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"32129": {
"content": "<e75>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"32130": {
"content": "<e74>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"32131": {
"content": "<e73>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"32132": {
"content": "<e72>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"32133": {
"content": "<e71>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"32134": {
"content": "<e70>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"32135": {
"content": "<e69>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"32136": {
"content": "<e68>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"32137": {
"content": "<e67>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"32138": {
"content": "<e66>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"32139": {
"content": "<e65>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"32140": {
"content": "<e64>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"32141": {
"content": "<e63>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"32142": {
"content": "<e62>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"32143": {
"content": "<e61>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"32144": {
"content": "<e60>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"32145": {
"content": "<e59>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"32146": {
"content": "<e58>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"32147": {
"content": "<e57>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"32148": {
"content": "<e56>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"32149": {
"content": "<e55>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"32150": {
"content": "<e54>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"32151": {
"content": "<e53>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"32152": {
"content": "<e52>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"32153": {
"content": "<e51>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"32154": {
"content": "<e50>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"32155": {
"content": "<e49>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"32156": {
"content": "<e48>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"32157": {
"content": "<e47>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"32158": {
"content": "<e46>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"32159": {
"content": "<e45>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"32160": {
"content": "<e44>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"32161": {
"content": "<e43>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"32162": {
"content": "<e42>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"32163": {
"content": "<e41>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"32164": {
"content": "<e40>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"32165": {
"content": "<e39>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"32166": {
"content": "<e38>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"32167": {
"content": "<e37>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"32168": {
"content": "<e36>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"32169": {
"content": "<e35>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"32170": {
"content": "<e34>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"32171": {
"content": "<e33>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"32172": {
"content": "<e32>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"32173": {
"content": "<e31>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"32174": {
"content": "<e30>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"32175": {
"content": "<e29>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"32176": {
"content": "<e28>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"32177": {
"content": "<e27>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"32178": {
"content": "<e26>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"32179": {
"content": "<e25>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"32180": {
"content": "<e24>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"32181": {
"content": "<e23>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"32182": {
"content": "<e22>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"32183": {
"content": "<e21>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"32184": {
"content": "<e20>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"32185": {
"content": "<e19>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"32186": {
"content": "<e18>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"32187": {
"content": "<e17>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"32188": {
"content": "<e16>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"32189": {
"content": "<e15>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"32190": {
"content": "<e14>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"32191": {
"content": "<e13>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"32192": {
"content": "<e12>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"32193": {
"content": "<e11>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"32194": {
"content": "<e10>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"32195": {
"content": "<e9>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"32196": {
"content": "<e8>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"32197": {
"content": "<e7>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"32198": {
"content": "<e6>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"32199": {
"content": "<e5>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"32200": {
"content": "<e4>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"32201": {
"content": "<e3>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"32202": {
"content": "<e2>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"32203": {
"content": "<e1>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"32204": {
"content": "<e0>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"32205": {
"content": "<msg>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"32206": {
"content": "<en>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"32207": {
"content": "<python>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"32208": {
"content": "<java>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"32209": {
"content": "<javascript>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"32210": {
"content": "<ruby>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"32211": {
"content": "<php>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"32212": {
"content": "<go>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"32213": {
"content": "<c>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"32214": {
"content": "<c_sharp>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"32215": {
"content": "<c_plus_plus>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
}
},
"additional_special_tokens": [
"<en>",
"<python>",
"<java>",
"<javascript>",
"<ruby>",
"<php>",
"<go>",
"<c>",
"<c_sharp>",
"<c_plus_plus>"
],
"bos_token": "<s>",
"clean_up_tokenization_spaces": true,
"cls_token": "<s>",
"eos_token": "</s>",
"errors": "replace",
"mask_token": "<mask>",
"model_max_length": 512,
"pad_token": "<pad>",
"sep_token": "</s>",
"tokenizer_class": "RobertaTokenizer",
"trim_offsets": true,
"unk_token": "<unk>"
}