jojo-ai-mst commited on
Commit
382d260
1 Parent(s): cace94d

Upload tokenizer

Browse files
Files changed (1) hide show
  1. tokenizer_config.json +4 -0
tokenizer_config.json CHANGED
@@ -1747,11 +1747,15 @@
1747
  "chat_template": "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '<start_of_turn>' + role + '\n' + message['content'] | trim + '<end_of_turn>\n' }}{% endfor %}{% if add_generation_prompt %}{{'<start_of_turn>model\n'}}{% endif %}",
1748
  "clean_up_tokenization_spaces": false,
1749
  "eos_token": "<eos>",
 
1750
  "model_max_length": 1000000000000000019884624838656,
1751
  "pad_token": "<eos>",
1752
  "sp_model_kwargs": {},
1753
  "spaces_between_special_tokens": false,
 
1754
  "tokenizer_class": "GemmaTokenizer",
 
 
1755
  "unk_token": "<unk>",
1756
  "use_default_system_prompt": false
1757
  }
 
1747
  "chat_template": "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '<start_of_turn>' + role + '\n' + message['content'] | trim + '<end_of_turn>\n' }}{% endfor %}{% if add_generation_prompt %}{{'<start_of_turn>model\n'}}{% endif %}",
1748
  "clean_up_tokenization_spaces": false,
1749
  "eos_token": "<eos>",
1750
+ "max_length": 512,
1751
  "model_max_length": 1000000000000000019884624838656,
1752
  "pad_token": "<eos>",
1753
  "sp_model_kwargs": {},
1754
  "spaces_between_special_tokens": false,
1755
+ "stride": 0,
1756
  "tokenizer_class": "GemmaTokenizer",
1757
+ "truncation_side": "right",
1758
+ "truncation_strategy": "longest_first",
1759
  "unk_token": "<unk>",
1760
  "use_default_system_prompt": false
1761
  }