jeongmi commited on
Commit
4e7c649
1 Parent(s): f1d76ce

Upload tokenizer

Browse files
Files changed (2) hide show
  1. special_tokens_map.json +7 -1
  2. tokenizer_config.json +7 -0
special_tokens_map.json CHANGED
@@ -13,7 +13,13 @@
13
  "rstrip": false,
14
  "single_word": false
15
  },
16
- "pad_token": "</s>",
 
 
 
 
 
 
17
  "unk_token": {
18
  "content": "<unk>",
19
  "lstrip": false,
 
13
  "rstrip": false,
14
  "single_word": false
15
  },
16
+ "pad_token": {
17
+ "content": "</s>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
  "unk_token": {
24
  "content": "<unk>",
25
  "lstrip": false,
tokenizer_config.json CHANGED
@@ -3960,12 +3960,19 @@
3960
  "clean_up_tokenization_spaces": false,
3961
  "eos_token": "</s>",
3962
  "fast": true,
 
3963
  "model_max_length": 1024,
 
3964
  "pad_token": "</s>",
 
 
3965
  "padding_size": "right",
3966
  "sp_model_kwargs": {},
3967
  "spaces_between_special_tokens": false,
 
3968
  "tokenizer_class": "LlamaTokenizer",
 
 
3969
  "unk_token": "<unk>",
3970
  "use_default_system_prompt": true
3971
  }
 
3960
  "clean_up_tokenization_spaces": false,
3961
  "eos_token": "</s>",
3962
  "fast": true,
3963
+ "max_length": 1024,
3964
  "model_max_length": 1024,
3965
+ "pad_to_multiple_of": null,
3966
  "pad_token": "</s>",
3967
+ "pad_token_type_id": 0,
3968
+ "padding_side": "left",
3969
  "padding_size": "right",
3970
  "sp_model_kwargs": {},
3971
  "spaces_between_special_tokens": false,
3972
+ "stride": 0,
3973
  "tokenizer_class": "LlamaTokenizer",
3974
+ "truncation_side": "right",
3975
+ "truncation_strategy": "longest_first",
3976
  "unk_token": "<unk>",
3977
  "use_default_system_prompt": true
3978
  }