mahiatlinux commited on
Commit
6bf9c2c
1 Parent(s): da55a2e

Upload 3 files

Browse files
Files changed (3) hide show
  1. tokenizer.json +1 -9
  2. tokenizer.model +3 -0
  3. tokenizer_config.json +1 -3
tokenizer.json CHANGED
@@ -1,14 +1,7 @@
1
  {
2
  "version": "1.0",
3
  "truncation": null,
4
- "padding": {
5
- "strategy": "BatchLongest",
6
- "direction": "Left",
7
- "pad_to_multiple_of": null,
8
- "pad_id": 1,
9
- "pad_type_id": 0,
10
- "pad_token": "<s>"
11
- },
12
  "added_tokens": [
13
  {
14
  "id": 0,
@@ -141,7 +134,6 @@
141
  "end_of_word_suffix": null,
142
  "fuse_unk": true,
143
  "byte_fallback": true,
144
- "ignore_merges": false,
145
  "vocab": {
146
  "<unk>": 0,
147
  "<s>": 1,
 
1
  {
2
  "version": "1.0",
3
  "truncation": null,
4
+ "padding": null,
 
 
 
 
 
 
 
5
  "added_tokens": [
6
  {
7
  "id": 0,
 
134
  "end_of_word_suffix": null,
135
  "fuse_unk": true,
136
  "byte_fallback": true,
 
137
  "vocab": {
138
  "<unk>": 0,
139
  "<s>": 1,
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc460a0129515b7579ec9f63218012601729de4fbd1b5de8d56dc47e8a204a29
3
+ size 493449
tokenizer_config.json CHANGED
@@ -1,7 +1,6 @@
1
  {
2
  "add_bos_token": true,
3
  "add_eos_token": false,
4
- "add_prefix_space": null,
5
  "added_tokens_decoder": {
6
  "0": {
7
  "content": "<unk>",
@@ -32,9 +31,8 @@
32
  "chat_template": "{% for message in messages %}{% if message['from'] == 'human' %}{{'<|im_start|>user\n' + message['value'] + '<|im_end|>\n'}}{% elif message['from'] == 'gpt' %}{{'<|im_start|>assistant\n' + message['value'] + '<|im_end|>\n' }}{% else %}{{ '<|im_start|>system\n' + message['value'] + '<|im_end|>\n' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
33
  "clean_up_tokenization_spaces": false,
34
  "eos_token": "<|im_end|>",
35
- "legacy": true,
36
  "model_max_length": 1000000000000000019884624838656,
37
- "pad_token": "<s>",
38
  "tokenizer_class": "LlamaTokenizer",
39
  "unk_token": "<unk>",
40
  "use_default_system_prompt": false
 
1
  {
2
  "add_bos_token": true,
3
  "add_eos_token": false,
 
4
  "added_tokens_decoder": {
5
  "0": {
6
  "content": "<unk>",
 
31
  "chat_template": "{% for message in messages %}{% if message['from'] == 'human' %}{{'<|im_start|>user\n' + message['value'] + '<|im_end|>\n'}}{% elif message['from'] == 'gpt' %}{{'<|im_start|>assistant\n' + message['value'] + '<|im_end|>\n' }}{% else %}{{ '<|im_start|>system\n' + message['value'] + '<|im_end|>\n' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
32
  "clean_up_tokenization_spaces": false,
33
  "eos_token": "<|im_end|>",
 
34
  "model_max_length": 1000000000000000019884624838656,
35
+ "pad_token": "<unk>",
36
  "tokenizer_class": "LlamaTokenizer",
37
  "unk_token": "<unk>",
38
  "use_default_system_prompt": false