Locutusque commited on
Commit
f7d7325
1 Parent(s): 9473e24

Upload 3 files

Browse files
Files changed (3) hide show
  1. special_tokens_map.json +4 -0
  2. tokenizer.json +26 -8
  3. tokenizer_config.json +24 -1
special_tokens_map.json CHANGED
@@ -1,4 +1,8 @@
1
  {
 
 
 
 
2
  "bos_token": {
3
  "content": "<|bos|>",
4
  "lstrip": false,
 
1
  {
2
+ "additional_special_tokens": [
3
+ "<|ASSISTANT|>",
4
+ "<|USER|>"
5
+ ],
6
  "bos_token": {
7
  "content": "<|bos|>",
8
  "lstrip": false,
tokenizer.json CHANGED
@@ -2,7 +2,7 @@
2
  "version": "1.0",
3
  "truncation": {
4
  "direction": "Right",
5
- "max_length": 1536,
6
  "strategy": "LongestFirst",
7
  "stride": 0
8
  },
@@ -61,6 +61,24 @@
61
  "rstrip": false,
62
  "normalized": false,
63
  "special": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
  }
65
  ],
66
  "normalizer": {
@@ -85,7 +103,7 @@
85
  "single": [
86
  {
87
  "SpecialToken": {
88
- "id": "<s>",
89
  "type_id": 0
90
  }
91
  },
@@ -99,7 +117,7 @@
99
  "pair": [
100
  {
101
  "SpecialToken": {
102
- "id": "<s>",
103
  "type_id": 0
104
  }
105
  },
@@ -111,7 +129,7 @@
111
  },
112
  {
113
  "SpecialToken": {
114
- "id": "<s>",
115
  "type_id": 1
116
  }
117
  },
@@ -123,13 +141,13 @@
123
  }
124
  ],
125
  "special_tokens": {
126
- "<s>": {
127
- "id": "<s>",
128
  "ids": [
129
- 1
130
  ],
131
  "tokens": [
132
- "<s>"
133
  ]
134
  }
135
  }
 
2
  "version": "1.0",
3
  "truncation": {
4
  "direction": "Right",
5
+ "max_length": 512,
6
  "strategy": "LongestFirst",
7
  "stride": 0
8
  },
 
61
  "rstrip": false,
62
  "normalized": false,
63
  "special": true
64
+ },
65
+ {
66
+ "id": 32003,
67
+ "content": "<|ASSISTANT|>",
68
+ "single_word": false,
69
+ "lstrip": false,
70
+ "rstrip": false,
71
+ "normalized": false,
72
+ "special": true
73
+ },
74
+ {
75
+ "id": 32004,
76
+ "content": "<|USER|>",
77
+ "single_word": false,
78
+ "lstrip": false,
79
+ "rstrip": false,
80
+ "normalized": false,
81
+ "special": true
82
  }
83
  ],
84
  "normalizer": {
 
103
  "single": [
104
  {
105
  "SpecialToken": {
106
+ "id": "<|bos|>",
107
  "type_id": 0
108
  }
109
  },
 
117
  "pair": [
118
  {
119
  "SpecialToken": {
120
+ "id": "<|bos|>",
121
  "type_id": 0
122
  }
123
  },
 
129
  },
130
  {
131
  "SpecialToken": {
132
+ "id": "<|bos|>",
133
  "type_id": 1
134
  }
135
  },
 
141
  }
142
  ],
143
  "special_tokens": {
144
+ "<|bos|>": {
145
+ "id": "<|bos|>",
146
  "ids": [
147
+ 32000
148
  ],
149
  "tokens": [
150
+ "<|bos|>"
151
  ]
152
  }
153
  }
tokenizer_config.json CHANGED
@@ -47,18 +47,41 @@
47
  "rstrip": false,
48
  "single_word": false,
49
  "special": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  }
51
  },
52
- "additional_special_tokens": [],
 
 
 
53
  "bos_token": "<|bos|>",
54
  "clean_up_tokenization_spaces": false,
55
  "eos_token": "<|endoftext|>",
56
  "legacy": true,
 
57
  "model_max_length": 1000000000000000019884624838656,
58
  "pad_token": "[PAD]",
59
  "sp_model_kwargs": {},
60
  "spaces_between_special_tokens": false,
 
61
  "tokenizer_class": "LlamaTokenizer",
 
 
62
  "unk_token": "<unk>",
63
  "use_default_system_prompt": true
64
  }
 
47
  "rstrip": false,
48
  "single_word": false,
49
  "special": true
50
+ },
51
+ "32003": {
52
+ "content": "<|ASSISTANT|>",
53
+ "lstrip": false,
54
+ "normalized": false,
55
+ "rstrip": false,
56
+ "single_word": false,
57
+ "special": true
58
+ },
59
+ "32004": {
60
+ "content": "<|USER|>",
61
+ "lstrip": false,
62
+ "normalized": false,
63
+ "rstrip": false,
64
+ "single_word": false,
65
+ "special": true
66
  }
67
  },
68
+ "additional_special_tokens": [
69
+ "<|ASSISTANT|>",
70
+ "<|USER|>"
71
+ ],
72
  "bos_token": "<|bos|>",
73
  "clean_up_tokenization_spaces": false,
74
  "eos_token": "<|endoftext|>",
75
  "legacy": true,
76
+ "max_length": 1536,
77
  "model_max_length": 1000000000000000019884624838656,
78
  "pad_token": "[PAD]",
79
  "sp_model_kwargs": {},
80
  "spaces_between_special_tokens": false,
81
+ "stride": 0,
82
  "tokenizer_class": "LlamaTokenizer",
83
+ "truncation_side": "right",
84
+ "truncation_strategy": "longest_first",
85
  "unk_token": "<unk>",
86
  "use_default_system_prompt": true
87
  }