ezellm-lite-tokenizer / tokenizer_config.json
TerminatorPower's picture
Upload ezellm-lite tokenizer (24,600 vocab, FIM + repo specials)
36f1b8c verified
{
"backend": "tokenizers",
"tokenizer_class": "TokenizersBackend",
"model_max_length": 1e+30,
"clean_up_tokenization_spaces": false,
"bos_token": null,
"eos_token": "<|endoftext|>",
"unk_token": null,
"pad_token": "<|endoftext|>",
"added_tokens_decoder": {
"24576": {
"content": "<|endoftext|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
"24577": {
"content": "<|fim_prefix|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
"24578": {
"content": "<|fim_middle|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
"24579": {
"content": "<|fim_suffix|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
"24580": {
"content": "<|fim_pad|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
"24581": {
"content": "<|file_sep|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
"24582": {
"content": "<|repo_name|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
"24583": {
"content": "<|filename|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
"24584": {
"content": "<|reserved_0|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
"24585": {
"content": "<|reserved_1|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
"24586": {
"content": "<|reserved_2|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
"24587": {
"content": "<|reserved_3|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
"24588": {
"content": "<|reserved_4|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
"24589": {
"content": "<|reserved_5|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
"24590": {
"content": "<|reserved_6|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
"24591": {
"content": "<|reserved_7|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
"24592": {
"content": "<|reserved_8|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
"24593": {
"content": "<|reserved_9|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
"24594": {
"content": "<|reserved_10|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
"24595": {
"content": "<|reserved_11|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
"24596": {
"content": "<|reserved_12|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
"24597": {
"content": "<|reserved_13|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
"24598": {
"content": "<|reserved_14|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
"24599": {
"content": "<|reserved_15|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
}
}
}