gptneo-125m-en-quatrain-conditioned / tokenizer_config.json
cyr19's picture
Upload tokenizer
66ed1e1 verified
raw
history blame contribute delete
No virus
5.27 kB
{
"add_bos_token": false,
"add_prefix_space": false,
"added_tokens_decoder": {
"50256": {
"content": "<|endoftext|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": true
},
"50257": {
"content": "<PAD>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50258": {
"content": "<quatrain>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50259": {
"content": "</quatrain>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50260": {
"content": "[ABCD]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50261": {
"content": "[AABC]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50262": {
"content": "[ABAC]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50263": {
"content": "[ABCC]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50264": {
"content": "[ABBA]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50265": {
"content": "[ABAB]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50266": {
"content": "[ABCB]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50267": {
"content": "[ABBC]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50268": {
"content": "[ABBB]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50269": {
"content": "[AABA]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50270": {
"content": "[AABB]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50271": {
"content": "[AAAB]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50272": {
"content": "[ABCA]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50273": {
"content": "[AAAA]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50274": {
"content": "[ABAA]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50275": {
"content": "<iambus>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50276": {
"content": "<trochee>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50277": {
"content": "<anapaest>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50278": {
"content": "<dactyl>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50279": {
"content": "<other>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50280": {
"content": "<amphibrach>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"50281": {
"content": "<alexandrine>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
}
},
"additional_special_tokens": [
"<PAD>",
"<quatrain>",
"</quatrain>",
"[ABCD]",
"[AABC]",
"[ABAC]",
"[ABCC]",
"[ABBA]",
"[ABAB]",
"[ABCB]",
"[ABBC]",
"[ABBB]",
"[AABA]",
"[AABB]",
"[AAAB]",
"[ABCA]",
"[AAAA]",
"[ABAA]",
"<iambus>",
"<trochee>",
"<anapaest>",
"<dactyl>",
"<other>",
"<amphibrach>",
"<alexandrine>"
],
"bos_token": "<|endoftext|>",
"clean_up_tokenization_spaces": true,
"eos_token": "<|endoftext|>",
"errors": "replace",
"model_max_length": 2048,
"pad_token": "<PAD>",
"tokenizer_class": "GPT2Tokenizer",
"unk_token": "<|endoftext|>"
}