plison commited on
Commit
e6e5df3
1 Parent(s): 888ad1c

Upload tokenizer

Browse files
Files changed (2) hide show
  1. tokenizer.json +2 -2
  2. tokenizer_config.json +4 -0
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7da53ca29fb16f6b2489482fc0bc6a394162cdab14d12764a1755ebc583fea79
3
- size 17518525
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:95bcd63551d6a3ed47b5cb623b034ce28c7d19d84441b84f2b84ad1120f01e47
3
+ size 17518624
tokenizer_config.json CHANGED
@@ -1747,11 +1747,15 @@
1747
  "chat_template": "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '<start_of_turn>' + role + '\n' + message['content'] | trim + '<end_of_turn>\n' }}{% endfor %}{% if add_generation_prompt %}{{'<start_of_turn>model\n'}}{% endif %}",
1748
  "clean_up_tokenization_spaces": false,
1749
  "eos_token": "<eos>",
 
1750
  "model_max_length": 1000000000000000019884624838656,
1751
  "pad_token": "<pad>",
1752
  "sp_model_kwargs": {},
1753
  "spaces_between_special_tokens": false,
 
1754
  "tokenizer_class": "GemmaTokenizer",
 
 
1755
  "unk_token": "<unk>",
1756
  "use_default_system_prompt": false
1757
  }
 
1747
  "chat_template": "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '<start_of_turn>' + role + '\n' + message['content'] | trim + '<end_of_turn>\n' }}{% endfor %}{% if add_generation_prompt %}{{'<start_of_turn>model\n'}}{% endif %}",
1748
  "clean_up_tokenization_spaces": false,
1749
  "eos_token": "<eos>",
1750
+ "max_length": 3800,
1751
  "model_max_length": 1000000000000000019884624838656,
1752
  "pad_token": "<pad>",
1753
  "sp_model_kwargs": {},
1754
  "spaces_between_special_tokens": false,
1755
+ "stride": 0,
1756
  "tokenizer_class": "GemmaTokenizer",
1757
+ "truncation_side": "right",
1758
+ "truncation_strategy": "longest_first",
1759
  "unk_token": "<unk>",
1760
  "use_default_system_prompt": false
1761
  }