wenbopan commited on
Commit
823a33e
1 Parent(s): d114d12

Faro tokenizer

Browse files
Files changed (2) hide show
  1. special_tokens_map.json +1 -1
  2. tokenizer_config.json +1 -17
special_tokens_map.json CHANGED
@@ -7,7 +7,7 @@
7
  "single_word": false
8
  },
9
  "eos_token": {
10
- "content": "<|im_end|>",
11
  "lstrip": false,
12
  "normalized": false,
13
  "rstrip": false,
 
7
  "single_word": false
8
  },
9
  "eos_token": {
10
+ "content": "<|endoftext|>",
11
  "lstrip": false,
12
  "normalized": false,
13
  "rstrip": false,
tokenizer_config.json CHANGED
@@ -26,28 +26,12 @@
26
  "rstrip": false,
27
  "single_word": false,
28
  "special": true
29
- },
30
- "6": {
31
- "content": "<|im_start|>",
32
- "lstrip": false,
33
- "normalized": false,
34
- "rstrip": false,
35
- "single_word": false,
36
- "special": false
37
- },
38
- "7": {
39
- "content": "<|im_end|>",
40
- "lstrip": false,
41
- "normalized": false,
42
- "rstrip": false,
43
- "single_word": false,
44
- "special": true
45
  }
46
  },
47
  "bos_token": "<|startoftext|>",
48
  "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
49
  "clean_up_tokenization_spaces": false,
50
- "eos_token": "<|im_end|>",
51
  "legacy": true,
52
  "model_max_length": 4096,
53
  "pad_token": "<unk>",
 
26
  "rstrip": false,
27
  "single_word": false,
28
  "special": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  }
30
  },
31
  "bos_token": "<|startoftext|>",
32
  "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
33
  "clean_up_tokenization_spaces": false,
34
+ "eos_token": "<|endoftext|>",
35
  "legacy": true,
36
  "model_max_length": 4096,
37
  "pad_token": "<unk>",