adamjweintraut
commited on
Commit
•
cff8dfc
1
Parent(s):
44da782
Upload tokenizer
Browse files- README.md +1 -1
- added_tokens.json +4 -1
- special_tokens_map.json +8 -2
- tokenizer_config.json +29 -5
README.md
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
---
|
2 |
license: apache-2.0
|
3 |
-
base_model: facebook/bart-large
|
4 |
tags:
|
5 |
- generated_from_trainer
|
|
|
6 |
model-index:
|
7 |
- name: bart-finetuned-lyrlen-128-tokens
|
8 |
results: []
|
|
|
1 |
---
|
2 |
license: apache-2.0
|
|
|
3 |
tags:
|
4 |
- generated_from_trainer
|
5 |
+
base_model: facebook/bart-large
|
6 |
model-index:
|
7 |
- name: bart-finetuned-lyrlen-128-tokens
|
8 |
results: []
|
added_tokens.json
CHANGED
@@ -1,5 +1,4 @@
|
|
1 |
{
|
2 |
-
"<P>": 50285,
|
3 |
"len_1": 50265,
|
4 |
"len_10": 50274,
|
5 |
"len_11": 50275,
|
@@ -13,6 +12,10 @@
|
|
13 |
"len_19": 50283,
|
14 |
"len_2": 50266,
|
15 |
"len_20": 50284,
|
|
|
|
|
|
|
|
|
16 |
"len_3": 50267,
|
17 |
"len_4": 50268,
|
18 |
"len_5": 50269,
|
|
|
1 |
{
|
|
|
2 |
"len_1": 50265,
|
3 |
"len_10": 50274,
|
4 |
"len_11": 50275,
|
|
|
12 |
"len_19": 50283,
|
13 |
"len_2": 50266,
|
14 |
"len_20": 50284,
|
15 |
+
"len_21": 50285,
|
16 |
+
"len_22": 50286,
|
17 |
+
"len_23": 50287,
|
18 |
+
"len_24": 50288,
|
19 |
"len_3": 50267,
|
20 |
"len_4": 50268,
|
21 |
"len_5": 50269,
|
special_tokens_map.json
CHANGED
@@ -13,7 +13,13 @@
|
|
13 |
"rstrip": false,
|
14 |
"single_word": false
|
15 |
},
|
16 |
-
"eos_token":
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
"mask_token": {
|
18 |
"content": "<mask>",
|
19 |
"lstrip": true,
|
@@ -21,7 +27,7 @@
|
|
21 |
"rstrip": false,
|
22 |
"single_word": false
|
23 |
},
|
24 |
-
"pad_token": "
|
25 |
"sep_token": {
|
26 |
"content": "</s>",
|
27 |
"lstrip": false,
|
|
|
13 |
"rstrip": false,
|
14 |
"single_word": false
|
15 |
},
|
16 |
+
"eos_token": {
|
17 |
+
"content": "</s>",
|
18 |
+
"lstrip": false,
|
19 |
+
"normalized": true,
|
20 |
+
"rstrip": false,
|
21 |
+
"single_word": false
|
22 |
+
},
|
23 |
"mask_token": {
|
24 |
"content": "<mask>",
|
25 |
"lstrip": true,
|
|
|
27 |
"rstrip": false,
|
28 |
"single_word": false
|
29 |
},
|
30 |
+
"pad_token": "</s>",
|
31 |
"sep_token": {
|
32 |
"content": "</s>",
|
33 |
"lstrip": false,
|
tokenizer_config.json
CHANGED
@@ -202,22 +202,46 @@
|
|
202 |
"special": false
|
203 |
},
|
204 |
"50285": {
|
205 |
-
"content": "
|
206 |
"lstrip": false,
|
207 |
-
"normalized":
|
208 |
"rstrip": false,
|
209 |
"single_word": false,
|
210 |
-
"special":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
211 |
}
|
212 |
},
|
213 |
"bos_token": "<s>",
|
214 |
"clean_up_tokenization_spaces": true,
|
215 |
"cls_token": "<s>",
|
216 |
-
"eos_token": "
|
217 |
"errors": "replace",
|
218 |
"mask_token": "<mask>",
|
219 |
"model_max_length": 1024,
|
220 |
-
"pad_token": "
|
221 |
"sep_token": "</s>",
|
222 |
"tokenizer_class": "BartTokenizer",
|
223 |
"unk_token": "<unk>"
|
|
|
202 |
"special": false
|
203 |
},
|
204 |
"50285": {
|
205 |
+
"content": "len_21",
|
206 |
"lstrip": false,
|
207 |
+
"normalized": true,
|
208 |
"rstrip": false,
|
209 |
"single_word": false,
|
210 |
+
"special": false
|
211 |
+
},
|
212 |
+
"50286": {
|
213 |
+
"content": "len_22",
|
214 |
+
"lstrip": false,
|
215 |
+
"normalized": true,
|
216 |
+
"rstrip": false,
|
217 |
+
"single_word": false,
|
218 |
+
"special": false
|
219 |
+
},
|
220 |
+
"50287": {
|
221 |
+
"content": "len_23",
|
222 |
+
"lstrip": false,
|
223 |
+
"normalized": true,
|
224 |
+
"rstrip": false,
|
225 |
+
"single_word": false,
|
226 |
+
"special": false
|
227 |
+
},
|
228 |
+
"50288": {
|
229 |
+
"content": "len_24",
|
230 |
+
"lstrip": false,
|
231 |
+
"normalized": true,
|
232 |
+
"rstrip": false,
|
233 |
+
"single_word": false,
|
234 |
+
"special": false
|
235 |
}
|
236 |
},
|
237 |
"bos_token": "<s>",
|
238 |
"clean_up_tokenization_spaces": true,
|
239 |
"cls_token": "<s>",
|
240 |
+
"eos_token": "</s>",
|
241 |
"errors": "replace",
|
242 |
"mask_token": "<mask>",
|
243 |
"model_max_length": 1024,
|
244 |
+
"pad_token": "</s>",
|
245 |
"sep_token": "</s>",
|
246 |
"tokenizer_class": "BartTokenizer",
|
247 |
"unk_token": "<unk>"
|