diff --git "a/tokenizer_config.json" "b/tokenizer_config.json" new file mode 100644--- /dev/null +++ "b/tokenizer_config.json" @@ -0,0 +1,16403 @@ +{ + "add_prefix_space": false, + "added_tokens_decoder": { + "50256": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": true + }, + "50257": { + "content": "", + "clean_up_tokenization_spaces": true, + "eos_token": "<|endoftext|>", + "model_max_length": 1024, + "tokenizer_class": "GPT2Tokenizer", + "unk_token": "<|endoftext|>" +}