HugoCasa commited on
Commit
d7b3ec8
1 Parent(s): 518a2e1

Upload tokenizer

Browse files
special_tokens_map.json CHANGED
@@ -1,5 +1,23 @@
1
  {
2
- "bos_token": "<|begin▁of▁sentence|>",
3
- "eos_token": "<|end▁of▁sentence|>",
4
- "pad_token": "<|end▁of▁sentence|>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  }
 
1
  {
2
+ "bos_token": {
3
+ "content": "<|begin▁of▁sentence|>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|end▁of▁sentence|>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "<|end▁of▁sentence|>",
18
+ "lstrip": false,
19
+ "normalized": true,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ }
23
  }
tokenizer.json CHANGED
@@ -126,7 +126,7 @@
126
  "single_word": false,
127
  "lstrip": false,
128
  "rstrip": false,
129
- "normalized": false,
130
  "special": true
131
  },
132
  {
@@ -135,7 +135,7 @@
135
  "single_word": false,
136
  "lstrip": false,
137
  "rstrip": false,
138
- "normalized": false,
139
  "special": true
140
  },
141
  {
 
126
  "single_word": false,
127
  "lstrip": false,
128
  "rstrip": false,
129
+ "normalized": true,
130
  "special": true
131
  },
132
  {
 
135
  "single_word": false,
136
  "lstrip": false,
137
  "rstrip": false,
138
+ "normalized": true,
139
  "special": true
140
  },
141
  {
tokenizer_config.json CHANGED
@@ -1,4 +1,6 @@
1
  {
 
 
2
  "added_tokens_decoder": {
3
  "32000": {
4
  "content": "õ",
@@ -107,7 +109,7 @@
107
  "32013": {
108
  "content": "<|begin▁of▁sentence|>",
109
  "lstrip": false,
110
- "normalized": false,
111
  "rstrip": false,
112
  "single_word": false,
113
  "special": true
@@ -115,7 +117,7 @@
115
  "32014": {
116
  "content": "<|end▁of▁sentence|>",
117
  "lstrip": false,
118
- "normalized": false,
119
  "rstrip": false,
120
  "single_word": false,
121
  "special": true
@@ -177,7 +179,6 @@
177
  "special": false
178
  }
179
  },
180
- "additional_special_tokens": [],
181
  "bos_token": "<|begin▁of▁sentence|>",
182
  "chat_template": "{%- set ns = namespace(found=false) -%}\n{%- for message in messages -%}\n {%- if message['role'] == 'system' -%}\n {%- set ns.found = true -%}\n {%- endif -%}\n{%- endfor -%}\n{{bos_token}}{%- if not ns.found -%}\n{%- endif %}\n{%- for message in messages %}\n {%- if message['role'] == 'system' %}\n{{ message['content'] + '\\n\\n' }}\n {%- else %}\n {%- if message['role'] == 'user' %}\n{{'@@ Instruction\\n' + message['content'] + '\\n\\n'}}\n {%- else %}\n{{'@@ Response\\n' + message['content'] + '\\n' + eos_token + '\\n'}}\n {%- endif %}\n {%- endif %}\n{%- endfor %}{% if add_generation_prompt %}{{ '@@ Response\n' }}{% endif %}",
183
  "clean_up_tokenization_spaces": false,
 
1
  {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
  "added_tokens_decoder": {
5
  "32000": {
6
  "content": "õ",
 
109
  "32013": {
110
  "content": "<|begin▁of▁sentence|>",
111
  "lstrip": false,
112
+ "normalized": true,
113
  "rstrip": false,
114
  "single_word": false,
115
  "special": true
 
117
  "32014": {
118
  "content": "<|end▁of▁sentence|>",
119
  "lstrip": false,
120
+ "normalized": true,
121
  "rstrip": false,
122
  "single_word": false,
123
  "special": true
 
179
  "special": false
180
  }
181
  },
 
182
  "bos_token": "<|begin▁of▁sentence|>",
183
  "chat_template": "{%- set ns = namespace(found=false) -%}\n{%- for message in messages -%}\n {%- if message['role'] == 'system' -%}\n {%- set ns.found = true -%}\n {%- endif -%}\n{%- endfor -%}\n{{bos_token}}{%- if not ns.found -%}\n{%- endif %}\n{%- for message in messages %}\n {%- if message['role'] == 'system' %}\n{{ message['content'] + '\\n\\n' }}\n {%- else %}\n {%- if message['role'] == 'user' %}\n{{'@@ Instruction\\n' + message['content'] + '\\n\\n'}}\n {%- else %}\n{{'@@ Response\\n' + message['content'] + '\\n' + eos_token + '\\n'}}\n {%- endif %}\n {%- endif %}\n{%- endfor %}{% if add_generation_prompt %}{{ '@@ Response\n' }}{% endif %}",
184
  "clean_up_tokenization_spaces": false,