unsubscribe commited on
Commit
6a2e2d5
1 Parent(s): 8253e76

Update special tokens (#1)

Browse files

- Update special tokens (15396abf87c3a1a17c813de12862353d55712578)

Files changed (1) hide show
  1. tokenizer_config.json +62 -14
tokenizer_config.json CHANGED
@@ -1,4 +1,17 @@
1
  {
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  "added_tokens_decoder": {
3
  "0": {
4
  "content": "<unk>",
@@ -23,20 +36,55 @@
23
  "rstrip": false,
24
  "single_word": false,
25
  "special": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  }
27
  },
28
- "additional_special_tokens": [],
29
- "auto_map": {
30
- "AutoTokenizer": [
31
- "tokenization_internlm.InternLMTokenizer",
32
- null
33
- ]
34
- },
35
- "bos_token": "<s>",
36
- "clean_up_tokenization_spaces": false,
37
- "eos_token": "</s>",
38
- "model_max_length": 1000000000000000019884624838656,
39
- "pad_token": "</s>",
40
- "tokenizer_class": "InternLMTokenizer",
41
- "unk_token": "<unk>"
42
  }
 
1
  {
2
+ "auto_map": {
3
+ "AutoTokenizer": [
4
+ "tokenization_internlm.InternLMTokenizer",
5
+ null
6
+ ]
7
+ },
8
+ "bos_token": "<s>",
9
+ "clean_up_tokenization_spaces": false,
10
+ "eos_token": "</s>",
11
+ "model_max_length": 1000000000000000019884624838656,
12
+ "pad_token": "</s>",
13
+ "tokenizer_class": "InternLMTokenizer",
14
+ "unk_token": "<unk>",
15
  "added_tokens_decoder": {
16
  "0": {
17
  "content": "<unk>",
 
36
  "rstrip": false,
37
  "single_word": false,
38
  "special": true
39
+ },
40
+ "92543": {
41
+ "content": "<|im_start|>",
42
+ "lstrip": false,
43
+ "normalized": false,
44
+ "rstrip": false,
45
+ "single_word": false,
46
+ "special": true
47
+ },
48
+ "92542": {
49
+ "content": "<|im_end|>",
50
+ "lstrip": false,
51
+ "normalized": false,
52
+ "rstrip": false,
53
+ "single_word": false,
54
+ "special": true
55
+ },
56
+ "92541": {
57
+ "content": "<|action_start|>",
58
+ "lstrip": false,
59
+ "normalized": false,
60
+ "rstrip": false,
61
+ "single_word": false,
62
+ "special": true
63
+ },
64
+ "92540": {
65
+ "content": "<|action_end|>",
66
+ "lstrip": false,
67
+ "normalized": false,
68
+ "rstrip": false,
69
+ "single_word": false,
70
+ "special": true
71
+ },
72
+ "92539": {
73
+ "content": "<|interpreter|>",
74
+ "lstrip": false,
75
+ "normalized": false,
76
+ "rstrip": false,
77
+ "single_word": false,
78
+ "special": true
79
+ },
80
+ "92538": {
81
+ "content": "<|plugin|>",
82
+ "lstrip": false,
83
+ "normalized": false,
84
+ "rstrip": false,
85
+ "single_word": false,
86
+ "special": true
87
  }
88
  },
89
+ "chat_template": "{{ bos_token }}{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}"
 
 
 
 
 
 
 
 
 
 
 
 
 
90
  }