echarlaix HF Staff commited on
Commit
aa30d41
·
1 Parent(s): 749347e

update tokenizers

Browse files
Files changed (2) hide show
  1. tokenizer.json +2 -2
  2. tokenizer_config.json +5 -19
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:40ca9f2afc67c230cfe5513fe038e3538a2f111cccf5c5aac82db24b63fbb884
3
- size 16464024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:358f1c052643d5f9f57704c5980b35fe94e94ccd8f007300a97d92360c4e3673
3
+ size 7319011
tokenizer_config.json CHANGED
@@ -1,29 +1,15 @@
1
  {
2
- "add_bos_token": true,
3
- "add_eos_token": false,
4
- "bos_token": {
5
- "__type": "AddedToken",
6
- "content": "|ENDOFTEXT|",
7
- "lstrip": false,
8
- "normalized": true,
9
- "rstrip": false,
10
- "single_word": false
11
- },
12
  "clean_up_tokenization_spaces": false,
13
  "eos_token": null,
14
- "legacy": null,
15
  "model_max_length": 512,
16
  "pad_token": null,
17
  "sp_model_kwargs": {},
18
  "spaces_between_special_tokens": false,
19
  "tokenizer_class": "LlamaTokenizer",
20
- "unk_token": {
21
- "__type": "AddedToken",
22
- "content": "<unk>",
23
- "lstrip": false,
24
- "normalized": true,
25
- "rstrip": false,
26
- "single_word": false
27
- },
28
  "use_default_system_prompt": true
29
  }
 
1
  {
2
+ "add_prefix_space": null,
3
+ "backend": "tokenizers",
4
+ "bos_token": "|ENDOFTEXT|",
 
 
 
 
 
 
 
5
  "clean_up_tokenization_spaces": false,
6
  "eos_token": null,
7
+ "is_local": false,
8
  "model_max_length": 512,
9
  "pad_token": null,
10
  "sp_model_kwargs": {},
11
  "spaces_between_special_tokens": false,
12
  "tokenizer_class": "LlamaTokenizer",
13
+ "unk_token": "<unk>",
 
 
 
 
 
 
 
14
  "use_default_system_prompt": true
15
  }