AIFT-42dot_LLM-PLM-1.3B-v1.51 / tokenizer_config.json
DooDooHyun's picture
Upload tokenizer
272598b verified
raw
history blame
9.97 kB
{
"add_bos_token": false,
"add_prefix_space": false,
"added_tokens_decoder": {
"50256": {
"content": "<|endoftext|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": true
},
"50257": {
"content": "<||bos||>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50258": {
"content": "<||pad||>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50259": {
"content": "<||unk||>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50260": {
"content": "<||unused1||>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50261": {
"content": "<||unused2||>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50262": {
"content": "<||unused3||>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50263": {
"content": "<||unused4||>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50264": {
"content": "<||unused5||>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50265": {
"content": "<||unused6||>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50266": {
"content": "<||unused7||>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50267": {
"content": "<||unused8||>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50268": {
"content": "<||unused9||>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50269": {
"content": "<||unused10||>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50270": {
"content": "<||unused11||>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50271": {
"content": "<||unused12||>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50272": {
"content": "<||unused13||>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50273": {
"content": "<||unused14||>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50274": {
"content": "<||unused15||>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50275": {
"content": "<||unused16||>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50276": {
"content": "<||unused17||>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50277": {
"content": "<||unused18||>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50278": {
"content": "<||unused19||>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50279": {
"content": "<||unused20||>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50280": {
"content": "<||unused21||>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50281": {
"content": "<||unused22||>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50282": {
"content": "<||unused23||>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50283": {
"content": "<||unused24||>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50284": {
"content": "<||unused25||>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50285": {
"content": "<||unused26||>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50286": {
"content": "<||unused27||>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50287": {
"content": "<||unused28||>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50288": {
"content": "<||unused29||>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50289": {
"content": "<||unused30||>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50290": {
"content": "<||unused31||>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50291": {
"content": "<||unused32||>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50292": {
"content": "<||unused33||>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50293": {
"content": "<||unused34||>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50294": {
"content": "<||unused35||>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50295": {
"content": "<||unused36||>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50296": {
"content": "<||unused37||>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50297": {
"content": "<||unused38||>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50298": {
"content": "<||unused39||>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50299": {
"content": "<||unused40||>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50300": {
"content": "<||unused41||>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50301": {
"content": "<||unused42||>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50302": {
"content": "<||unused43||>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50303": {
"content": "<||unused44||>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
}
},
"additional_special_tokens": [
"<||unused1||>",
"<||unused2||>",
"<||unused3||>",
"<||unused4||>",
"<||unused5||>",
"<||unused6||>",
"<||unused7||>",
"<||unused8||>",
"<||unused9||>",
"<||unused10||>",
"<||unused11||>",
"<||unused12||>",
"<||unused13||>",
"<||unused14||>",
"<||unused15||>",
"<||unused16||>",
"<||unused17||>",
"<||unused18||>",
"<||unused19||>",
"<||unused20||>",
"<||unused21||>",
"<||unused22||>",
"<||unused23||>",
"<||unused24||>",
"<||unused25||>",
"<||unused26||>",
"<||unused27||>",
"<||unused28||>",
"<||unused29||>",
"<||unused30||>",
"<||unused31||>",
"<||unused32||>",
"<||unused33||>",
"<||unused34||>",
"<||unused35||>",
"<||unused36||>",
"<||unused37||>",
"<||unused38||>",
"<||unused39||>",
"<||unused40||>",
"<||unused41||>",
"<||unused42||>",
"<||unused43||>",
"<||unused44||>"
],
"bos_token": "<||bos||>",
"clean_up_tokenization_spaces": true,
"eos_token": "<|endoftext|>",
"errors": "replace",
"model_max_length": 8192,
"pad_token": "<||pad||>",
"padding_side": "right",
"tokenizer_class": "GPT2Tokenizer",
"unk_token": "<||unk||>"
}