adamjweintraut commited on
Commit
cff8dfc
1 Parent(s): 44da782

Upload tokenizer

Browse files
README.md CHANGED
@@ -1,8 +1,8 @@
1
  ---
2
  license: apache-2.0
3
- base_model: facebook/bart-large
4
  tags:
5
  - generated_from_trainer
 
6
  model-index:
7
  - name: bart-finetuned-lyrlen-128-tokens
8
  results: []
 
1
  ---
2
  license: apache-2.0
 
3
  tags:
4
  - generated_from_trainer
5
+ base_model: facebook/bart-large
6
  model-index:
7
  - name: bart-finetuned-lyrlen-128-tokens
8
  results: []
added_tokens.json CHANGED
@@ -1,5 +1,4 @@
1
  {
2
- "<P>": 50285,
3
  "len_1": 50265,
4
  "len_10": 50274,
5
  "len_11": 50275,
@@ -13,6 +12,10 @@
13
  "len_19": 50283,
14
  "len_2": 50266,
15
  "len_20": 50284,
 
 
 
 
16
  "len_3": 50267,
17
  "len_4": 50268,
18
  "len_5": 50269,
 
1
  {
 
2
  "len_1": 50265,
3
  "len_10": 50274,
4
  "len_11": 50275,
 
12
  "len_19": 50283,
13
  "len_2": 50266,
14
  "len_20": 50284,
15
+ "len_21": 50285,
16
+ "len_22": 50286,
17
+ "len_23": 50287,
18
+ "len_24": 50288,
19
  "len_3": 50267,
20
  "len_4": 50268,
21
  "len_5": 50269,
special_tokens_map.json CHANGED
@@ -13,7 +13,13 @@
13
  "rstrip": false,
14
  "single_word": false
15
  },
16
- "eos_token": "<P>",
 
 
 
 
 
 
17
  "mask_token": {
18
  "content": "<mask>",
19
  "lstrip": true,
@@ -21,7 +27,7 @@
21
  "rstrip": false,
22
  "single_word": false
23
  },
24
- "pad_token": "<P>",
25
  "sep_token": {
26
  "content": "</s>",
27
  "lstrip": false,
 
13
  "rstrip": false,
14
  "single_word": false
15
  },
16
+ "eos_token": {
17
+ "content": "</s>",
18
+ "lstrip": false,
19
+ "normalized": true,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
  "mask_token": {
24
  "content": "<mask>",
25
  "lstrip": true,
 
27
  "rstrip": false,
28
  "single_word": false
29
  },
30
+ "pad_token": "</s>",
31
  "sep_token": {
32
  "content": "</s>",
33
  "lstrip": false,
tokenizer_config.json CHANGED
@@ -202,22 +202,46 @@
202
  "special": false
203
  },
204
  "50285": {
205
- "content": "<P>",
206
  "lstrip": false,
207
- "normalized": false,
208
  "rstrip": false,
209
  "single_word": false,
210
- "special": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
211
  }
212
  },
213
  "bos_token": "<s>",
214
  "clean_up_tokenization_spaces": true,
215
  "cls_token": "<s>",
216
- "eos_token": "<P>",
217
  "errors": "replace",
218
  "mask_token": "<mask>",
219
  "model_max_length": 1024,
220
- "pad_token": "<P>",
221
  "sep_token": "</s>",
222
  "tokenizer_class": "BartTokenizer",
223
  "unk_token": "<unk>"
 
202
  "special": false
203
  },
204
  "50285": {
205
+ "content": "len_21",
206
  "lstrip": false,
207
+ "normalized": true,
208
  "rstrip": false,
209
  "single_word": false,
210
+ "special": false
211
+ },
212
+ "50286": {
213
+ "content": "len_22",
214
+ "lstrip": false,
215
+ "normalized": true,
216
+ "rstrip": false,
217
+ "single_word": false,
218
+ "special": false
219
+ },
220
+ "50287": {
221
+ "content": "len_23",
222
+ "lstrip": false,
223
+ "normalized": true,
224
+ "rstrip": false,
225
+ "single_word": false,
226
+ "special": false
227
+ },
228
+ "50288": {
229
+ "content": "len_24",
230
+ "lstrip": false,
231
+ "normalized": true,
232
+ "rstrip": false,
233
+ "single_word": false,
234
+ "special": false
235
  }
236
  },
237
  "bos_token": "<s>",
238
  "clean_up_tokenization_spaces": true,
239
  "cls_token": "<s>",
240
+ "eos_token": "</s>",
241
  "errors": "replace",
242
  "mask_token": "<mask>",
243
  "model_max_length": 1024,
244
+ "pad_token": "</s>",
245
  "sep_token": "</s>",
246
  "tokenizer_class": "BartTokenizer",
247
  "unk_token": "<unk>"