thhaus commited on
Commit
7a6293e
·
verified ·
1 Parent(s): 761c291

Upload tokenizer

Browse files
special_tokens_map.json CHANGED
@@ -12,5 +12,12 @@
12
  "normalized": false,
13
  "rstrip": false,
14
  "single_word": false
 
 
 
 
 
 
 
15
  }
16
  }
 
12
  "normalized": false,
13
  "rstrip": false,
14
  "single_word": false
15
+ },
16
+ "unk_token": {
17
+ "content": "<unk>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
  }
23
  }
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a542c966524e01dc58f69ba95a3779862899703104e416a6ccacd28ea16d6a64
3
- size 34809697
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e27b8cc9cb1c7cdab5d45b0c8d027fd6e812afe70496917fc33bb336a79a346d
3
+ size 34810700
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6dfd8b970f437002fc445214304969fe59e64d4f48500bd0b77ba55340f2d811
3
+ size 4545602
tokenizer_config.json CHANGED
@@ -1,5 +1,24 @@
1
  {
 
 
 
2
  "added_tokens_decoder": {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  "2": {
4
  "content": "<s>",
5
  "lstrip": false,
@@ -8015,23 +8034,17 @@
8015
  "rstrip": false,
8016
  "single_word": false,
8017
  "special": false
8018
- },
8019
- "255999": {
8020
- "content": "<pad>",
8021
- "lstrip": false,
8022
- "normalized": false,
8023
- "rstrip": false,
8024
- "single_word": false,
8025
- "special": true
8026
  }
8027
  },
8028
  "bos_token": "<s>",
8029
  "clean_up_tokenization_spaces": false,
8030
  "eos_token": "</s>",
8031
- "model_input_names": [
8032
- "input_ids",
8033
- "attention_mask"
8034
- ],
8035
  "model_max_length": 1000000000000000019884624838656,
8036
- "tokenizer_class": "PreTrainedTokenizerFast"
 
 
 
 
 
8037
  }
 
1
  {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "add_prefix_space": true,
5
  "added_tokens_decoder": {
6
+ "0": {
7
+ "content": "<pad>",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false,
12
+ "special": true
13
+ },
14
+ "1": {
15
+ "content": "<unk>",
16
+ "lstrip": false,
17
+ "normalized": false,
18
+ "rstrip": false,
19
+ "single_word": false,
20
+ "special": true
21
+ },
22
  "2": {
23
  "content": "<s>",
24
  "lstrip": false,
 
8034
  "rstrip": false,
8035
  "single_word": false,
8036
  "special": false
 
 
 
 
 
 
 
 
8037
  }
8038
  },
8039
  "bos_token": "<s>",
8040
  "clean_up_tokenization_spaces": false,
8041
  "eos_token": "</s>",
8042
+ "legacy": false,
 
 
 
8043
  "model_max_length": 1000000000000000019884624838656,
8044
+ "pad_token": null,
8045
+ "sp_model_kwargs": {},
8046
+ "spaces_between_special_tokens": false,
8047
+ "tokenizer_class": "LlamaTokenizer",
8048
+ "unk_token": "<unk>",
8049
+ "use_default_system_prompt": false
8050
  }