stablelm-2-1_6b-dev / tokenizer_config.json
jon-tow's picture
Update tokenizer_config.json
4c2c450 verified
raw
history blame
874 Bytes
{
"add_prefix_space": false,
"additional_special_tokens": [
"<|endoftext|>",
"<|fim_prefix|>",
"<|fim_middle|>",
"<|fim_suffix|>",
"<|fim_pad|>",
"<gh_stars>",
"<filename>",
"<issue_start>",
"<issue_comment>",
"<issue_closed>",
"<jupyter_start>",
"<jupyter_text>",
"<jupyter_code>",
"<jupyter_output>",
"<empty_output>",
"<commit_before>",
"<commit_msg>",
"<commit_after>",
"<reponame>",
"<|endofprompt|>",
"<|im_start|>",
"<|im_end|>",
"<|pause|>",
"<|reg0|>",
"<|reg1|>",
"<|reg2|>",
"<|reg3|>",
"<|reg4|>",
"<|reg5|>",
"<|reg6|>",
"<|reg7|>",
"<|extra0|>"
],
"bos_token": "<|endoftext|>",
"clean_up_tokenization_spaces": true,
"eos_token": "<|endoftext|>",
"tokenizer_class": "GPT2TokenizerFast",
"unk_token": "<|endoftext|>"
}