PNAKTEMPORAL commited on
Commit
94f492f
1 Parent(s): ef5d9c2

Upload tokenizer

Browse files
special_tokens_map.json CHANGED
@@ -17,7 +17,6 @@
17
  "<|prefix_end|>",
18
  "<|prompter|>"
19
  ],
20
- "eos_token": "<|endoftext|>",
21
- "pad_token": "<|endoftext|>",
22
  "sep_token": "<|endoftext|>"
23
  }
 
17
  "<|prefix_end|>",
18
  "<|prompter|>"
19
  ],
20
+ "eos_token": "<eos>",
 
21
  "sep_token": "<|endoftext|>"
22
  }
tokenizer.json CHANGED
@@ -1,6 +1,11 @@
1
  {
2
  "version": "1.0",
3
- "truncation": null,
 
 
 
 
 
4
  "padding": null,
5
  "added_tokens": [
6
  {
@@ -155,6 +160,15 @@
155
  "rstrip": false,
156
  "normalized": false,
157
  "special": true
 
 
 
 
 
 
 
 
 
158
  }
159
  ],
160
  "normalizer": null,
 
1
  {
2
  "version": "1.0",
3
+ "truncation": {
4
+ "direction": "Right",
5
+ "max_length": 4096,
6
+ "strategy": "LongestFirst",
7
+ "stride": 0
8
+ },
9
  "padding": null,
10
  "added_tokens": [
11
  {
 
160
  "rstrip": false,
161
  "normalized": false,
162
  "special": true
163
+ },
164
+ {
165
+ "id": 65029,
166
+ "content": "<eos>",
167
+ "single_word": false,
168
+ "lstrip": false,
169
+ "rstrip": false,
170
+ "normalized": false,
171
+ "special": true
172
  }
173
  ],
174
  "normalizer": null,
tokenizer_config.json CHANGED
@@ -1,4 +1,5 @@
1
  {
 
2
  "add_prefix_space": false,
3
  "clean_up_tokenization_spaces": true,
4
  "eos_token": "<|endoftext|>",
 
1
  {
2
+ "add_eos_token": true,
3
  "add_prefix_space": false,
4
  "clean_up_tokenization_spaces": true,
5
  "eos_token": "<|endoftext|>",