Guilherme34 commited on
Commit
51959b7
1 Parent(s): b6b5ee7

Upload tokenizer

Browse files
special_tokens_map.json CHANGED
@@ -13,7 +13,13 @@
13
  "rstrip": false,
14
  "single_word": false
15
  },
16
- "pad_token": "<|im_end|>",
 
 
 
 
 
 
17
  "unk_token": {
18
  "content": "<unk>",
19
  "lstrip": false,
 
13
  "rstrip": false,
14
  "single_word": false
15
  },
16
+ "pad_token": {
17
+ "content": "</s>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
  "unk_token": {
24
  "content": "<unk>",
25
  "lstrip": false,
tokenizer.json CHANGED
@@ -80,12 +80,6 @@
80
  "id": "A",
81
  "type_id": 0
82
  }
83
- },
84
- {
85
- "SpecialToken": {
86
- "id": "<|im_end|>",
87
- "type_id": 0
88
- }
89
  }
90
  ],
91
  "pair": [
@@ -101,12 +95,6 @@
101
  "type_id": 0
102
  }
103
  },
104
- {
105
- "SpecialToken": {
106
- "id": "<|im_end|>",
107
- "type_id": 0
108
- }
109
- },
110
  {
111
  "SpecialToken": {
112
  "id": "<s>",
@@ -118,12 +106,6 @@
118
  "id": "B",
119
  "type_id": 1
120
  }
121
- },
122
- {
123
- "SpecialToken": {
124
- "id": "<|im_end|>",
125
- "type_id": 1
126
- }
127
  }
128
  ],
129
  "special_tokens": {
@@ -135,15 +117,6 @@
135
  "tokens": [
136
  "<s>"
137
  ]
138
- },
139
- "<|im_end|>": {
140
- "id": "<|im_end|>",
141
- "ids": [
142
- 32000
143
- ],
144
- "tokens": [
145
- "<|im_end|>"
146
- ]
147
  }
148
  }
149
  },
 
80
  "id": "A",
81
  "type_id": 0
82
  }
 
 
 
 
 
 
83
  }
84
  ],
85
  "pair": [
 
95
  "type_id": 0
96
  }
97
  },
 
 
 
 
 
 
98
  {
99
  "SpecialToken": {
100
  "id": "<s>",
 
106
  "id": "B",
107
  "type_id": 1
108
  }
 
 
 
 
 
 
109
  }
110
  ],
111
  "special_tokens": {
 
117
  "tokens": [
118
  "<s>"
119
  ]
 
 
 
 
 
 
 
 
 
120
  }
121
  }
122
  },
tokenizer_config.json CHANGED
@@ -1,6 +1,7 @@
1
  {
2
  "add_bos_token": true,
3
- "add_eos_token": true,
 
4
  "added_tokens_decoder": {
5
  "0": {
6
  "content": "<unk>",
@@ -50,8 +51,7 @@
50
  "eos_token": "<|im_end|>",
51
  "legacy": true,
52
  "model_max_length": 1000000000000000019884624838656,
53
- "pad_token": "<|im_end|>",
54
- "padding_side": "left",
55
  "sp_model_kwargs": {},
56
  "spaces_between_special_tokens": false,
57
  "tokenizer_class": "LlamaTokenizer",
 
1
  {
2
  "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "add_prefix_space": true,
5
  "added_tokens_decoder": {
6
  "0": {
7
  "content": "<unk>",
 
51
  "eos_token": "<|im_end|>",
52
  "legacy": true,
53
  "model_max_length": 1000000000000000019884624838656,
54
+ "pad_token": "</s>",
 
55
  "sp_model_kwargs": {},
56
  "spaces_between_special_tokens": false,
57
  "tokenizer_class": "LlamaTokenizer",