llama-hebrew-tokenizer-20k / tokenizer_config.json
Norod78's picture
Upload tokenizer
52e9922
raw
history blame
342 Bytes
{
"additional_special_tokens": [
"<|startofchat|>",
"<|endofchat|>",
"<|human|>",
"<|gpt|>"
],
"bos_token": "</s>",
"clean_up_tokenization_spaces": false,
"eos_token": "</s>",
"model_max_length": 2048,
"pad_token": "[PAD]",
"sp_model_kwargs": {},
"tokenizer_class": "LlamaTokenizer",
"unk_token": "</s>"
}